setwd("~/HomeWork/Practice")
devtools::install_github("kassambara/datarium")
## Skipping install of 'datarium' from a github remote, the SHA1 (f9f9b3a6) has not changed since last install.
##   Use `force = TRUE` to force installation
data("marketing", package="datarium")
head(marketing)
data("swiss")
head(swiss)
data("Boston", package="MASS")
head(Boston)

Chapter 3 Regression

Loading Required R packages

library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.1     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
theme_set(theme_bw())

Preparing the data

# Load the data
data("marketing", package = "datarium")
# Inspect th data
sample_n(marketing,3)

Split the data into training and test set

set.seed(123)
training.samples <- marketing$sales %>% createDataPartition(p=0.8, list = FALSE)

train.data <- marketing[training.samples,]
test.data <- marketing[-training.samples,]
summary(train.data)
##     youtube          facebook       newspaper          sales      
##  Min.   :  0.84   Min.   : 0.00   Min.   :  0.36   Min.   : 1.92  
##  1st Qu.: 85.95   1st Qu.:11.61   1st Qu.: 19.11   1st Qu.:12.48  
##  Median :196.08   Median :25.44   Median : 35.16   Median :15.48  
##  Mean   :179.24   Mean   :27.51   Mean   : 39.07   Mean   :16.77  
##  3rd Qu.:264.54   3rd Qu.:43.89   3rd Qu.: 55.02   3rd Qu.:20.88  
##  Max.   :355.68   Max.   :59.52   Max.   :136.80   Max.   :32.40
dim(train.data)
## [1] 162   4
dim(test.data)
## [1] 38  4

Computing linear regression in r

model <- lm(sales ~., data = train.data)
summary(model)
## 
## Call:
## lm(formula = sales ~ ., data = train.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.4122  -1.1101   0.3475   1.4218   3.4987 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.391883   0.440622   7.698 1.41e-12 ***
## youtube     0.045574   0.001592  28.630  < 2e-16 ***
## facebook    0.186941   0.009888  18.905  < 2e-16 ***
## newspaper   0.001786   0.006773   0.264    0.792    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.119 on 158 degrees of freedom
## Multiple R-squared:  0.8902, Adjusted R-squared:  0.8881 
## F-statistic: 426.9 on 3 and 158 DF,  p-value: < 2.2e-16
# Make predictios
predictions <- model %>% predict(test.data)

# Model Performance
# (a) Prediction error, RMSE
RMSE(predictions, test.data$sales)
## [1] 1.590096
# (b) R-square
R2(predictions, test.data$sales)
## [1] 0.9380644

Non-Linear Regression

data("Boston", package = "MASS")
set.seed(123)

head(Boston)
training1.samples <- Boston$medv %>% createDataPartition(p=0.8, list = FALSE)

train1.data <- Boston[training.samples,]
test1.data <- Boston[-training.samples, ]

ggplot(train1.data, aes(lstat, medv)) +
  geom_point() +
  stat_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Let’s start with fitting a linear regression model

model.lm <- lm(medv~lstat, data = train1.data)
summary(model.lm)
## 
## Call:
## lm(formula = medv ~ lstat, data = train1.data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.889 -3.805 -1.656  2.059 19.972 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 33.98512    0.88151   38.55   <2e-16 ***
## lstat       -0.88925    0.06524  -13.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.548 on 160 degrees of freedom
## Multiple R-squared:  0.5373, Adjusted R-squared:  0.5344 
## F-statistic: 185.8 on 1 and 160 DF,  p-value: < 2.2e-16
prediction <- model.lm %>% predict(test1.data)

data.frame(RMSE = RMSE(prediction, test1.data$medv),
           R2 = R2(prediction, test1.data$medv))

Let’s visualize the data

ggplot(train1.data, aes(lstat, medv)) +
  geom_point() +
  stat_smooth(method = lm, formula = y ~ x)

Polynomial regression

medv = b0 + b1 ∗ lstat + b2 ∗ lstat2

mod2 <- lm(medv ~ lstat + I(lstat*lstat), data = train1.data)
summary(mod2)
## 
## Call:
## lm(formula = medv ~ lstat + I(lstat * lstat), data = train1.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.3393 -3.2427 -0.5934  2.0975 16.7311 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      43.432475   1.274260  34.084  < 2e-16 ***
## lstat            -2.520720   0.189206 -13.323  < 2e-16 ***
## I(lstat * lstat)  0.053205   0.005921   8.987 7.09e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.532 on 159 degrees of freedom
## Multiple R-squared:  0.6932, Adjusted R-squared:  0.6893 
## F-statistic: 179.6 on 2 and 159 DF,  p-value: < 2.2e-16
prediction2 <- mod2 %>% predict(test1.data)
data.frame(
  RMSE = RMSE(prediction2, test1.data$medv),
  R2 = R2(prediction2, test1.data$medv)
)
mod22 <- lm(medv ~ poly(lstat,2), data = train1.data)
summary(mod22)
## 
## Call:
## lm(formula = medv ~ poly(lstat, 2), data = train1.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.3393 -3.2427 -0.5934  2.0975 16.7311 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      23.5407     0.3561  66.116  < 2e-16 ***
## poly(lstat, 2)1 -75.6187     4.5318 -16.686  < 2e-16 ***
## poly(lstat, 2)2  40.7253     4.5318   8.987 7.09e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.532 on 159 degrees of freedom
## Multiple R-squared:  0.6932, Adjusted R-squared:  0.6893 
## F-statistic: 179.6 on 2 and 159 DF,  p-value: < 2.2e-16
prediction22 <- mod22 %>% predict(test1.data)
data.frame(
  RMSE = RMSE(prediction22, test1.data$medv),
  R2 = R2(prediction22, test1.data$medv)
)
ggplot(train1.data, aes(lstat, medv)) +
  geom_point() +
  stat_smooth(method = lm, formula = y ~ poly(x,2))

ggplot(train1.data, aes(lstat, medv)) +
  geom_point() +
  stat_smooth(method = lm, formula = y ~ (x + I(x*x)))

mod6 <- lm(medv ~ poly(lstat, 6), data = train1.data)
summary(mod6)
## 
## Call:
## lm(formula = medv ~ poly(lstat, 6), data = train1.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.4412  -2.3739  -0.7595   1.7344  15.9725 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      23.5407     0.3304  71.245  < 2e-16 ***
## poly(lstat, 6)1 -75.6187     4.2055 -17.981  < 2e-16 ***
## poly(lstat, 6)2  40.7253     4.2055   9.684  < 2e-16 ***
## poly(lstat, 6)3 -16.9822     4.2055  -4.038 8.45e-05 ***
## poly(lstat, 6)4  12.1006     4.2055   2.877  0.00458 ** 
## poly(lstat, 6)5  -9.1610     4.2055  -2.178  0.03089 *  
## poly(lstat, 6)6  -2.2956     4.2055  -0.546  0.58595    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.206 on 155 degrees of freedom
## Multiple R-squared:  0.7424, Adjusted R-squared:  0.7324 
## F-statistic: 74.45 on 6 and 155 DF,  p-value: < 2.2e-16

As we can see in above model summary the polynomial term beyond 5 is not significant

Let’s visualize model

ggplot(train1.data, aes(lstat, medv)) +
  geom_point() +
  stat_smooth(method = lm, formula = y ~ poly(x,6))

mod5 <- lm(medv ~ poly(lstat, 5), data = train1.data)
summary(mod5)
## 
## Call:
## lm(formula = medv ~ poly(lstat, 5), data = train1.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.0370  -2.4259  -0.7941   1.6414  16.1361 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      23.5407     0.3297  71.406  < 2e-16 ***
## poly(lstat, 5)1 -75.6187     4.1961 -18.021  < 2e-16 ***
## poly(lstat, 5)2  40.7253     4.1961   9.706  < 2e-16 ***
## poly(lstat, 5)3 -16.9822     4.1961  -4.047 8.14e-05 ***
## poly(lstat, 5)4  12.1006     4.1961   2.884  0.00448 ** 
## poly(lstat, 5)5  -9.1610     4.1961  -2.183  0.03051 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.196 on 156 degrees of freedom
## Multiple R-squared:  0.7419, Adjusted R-squared:  0.7336 
## F-statistic: 89.69 on 5 and 156 DF,  p-value: < 2.2e-16
ggplot(train1.data, aes(lstat, medv)) +
  geom_point() +
  stat_smooth(method = lm, formula = y ~ poly(x,5))

Using log transformation

mod.log <- lm(medv ~ log(lstat), data = train1.data)
summary(mod.log)
## 
## Call:
## lm(formula = medv ~ log(lstat), data = train1.data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.610 -2.975 -1.048  1.886 17.093 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  50.2130     1.4077   35.67   <2e-16 ***
## log(lstat)  -11.5918     0.5928  -19.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.43 on 160 degrees of freedom
## Multiple R-squared:  0.705,  Adjusted R-squared:  0.7032 
## F-statistic: 382.4 on 1 and 160 DF,  p-value: < 2.2e-16
prediction.log <- mod.log %>% predict(test1.data)

data.frame(
  RMSE = RMSE(prediction.log, test1.data$medv),
  R2 = R2(prediction.log, test1.data$medv)
)
ggplot(data = train1.data, aes(lstat, medv))+
  geom_point() +
  stat_smooth(method = lm, formula = y~log(x))

Spline Regression

library(splines)
knots <- quantile(train1.data$lstat, p = c(0.25, 0.5, 0.75))
model.spline <- lm(medv ~bs(lstat,knots=knots), data = train1.data)
summary(model.spline)
## 
## Call:
## lm(formula = medv ~ bs(lstat, knots = knots), data = train1.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.0823 -2.0512 -0.6894  1.8034 15.8245 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 44.032      2.643  16.658  < 2e-16 ***
## bs(lstat, knots = knots)1   -1.127      4.160  -0.271    0.787    
## bs(lstat, knots = knots)2  -22.744      2.831  -8.034 2.23e-13 ***
## bs(lstat, knots = knots)3  -19.776      3.102  -6.376 1.98e-09 ***
## bs(lstat, knots = knots)4  -33.442      3.570  -9.366  < 2e-16 ***
## bs(lstat, knots = knots)5  -27.995      4.901  -5.713 5.53e-08 ***
## bs(lstat, knots = knots)6  -29.583      4.500  -6.574 7.08e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.097 on 155 degrees of freedom
## Multiple R-squared:  0.7555, Adjusted R-squared:  0.746 
## F-statistic: 79.82 on 6 and 155 DF,  p-value: < 2.2e-16
prediction.spline <- model.spline %>% predict(test1.data)
## Warning in bs(lstat, degree = 3L, knots = c(`25%` = 6.5425, `50%` =
## 10.285, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
data.frame(
  RMSE = RMSE(prediction.spline, test1.data$medv),
  R2 = R2(prediction.spline, test1.data$medv)
)
ggplot(train1.data, aes(lstat, medv)) + 
  geom_point() + 
  stat_smooth(method = lm, formula = y~bs(x,df=3))

Generalized additive models

library(mgcv)
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.8-27. For overview type 'help("mgcv-package")'.
model.additive <- gam(medv ~ s(lstat), data = train1.data)
summary(model.additive)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## medv ~ s(lstat)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  23.5407     0.3261   72.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##            edf Ref.df     F p-value    
## s(lstat) 5.825   7.01 65.16  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.739   Deviance explained = 74.9%
## GCV =  17.99  Scale est. = 17.232    n = 162
prediction.additive <- model.additive %>% predict(test1.data)

data.frame(
  RMSE = RMSE(prediction.additive, test1.data$medv),
  R2 = R2(prediction.additive, test1.data$medv)
)
ggplot(train1.data, aes(lstat, medv)) +
  geom_point() +
  stat_smooth(method = gam, formula = y ~ s(x))

Regression Model Accuracy Metrics

Model performance metrics

In regression model, the most commonly known evaluation metrics include:

  1. R-squared (R2), which is the proportion of variation in the outcome that is explained by the predictor variables. In multiple regression models, R2 corresponds to the squared correlation between the observed outcome values and the predicted values by the model. The Higer the R-squared, the better the model.

  2. Root Mean Squared Error (RMSE), which measures the average error performed by the model in the predicting the outcome for an observation. Mathematically, the RMSE is the square root of the mean squared error (MSE), which is the average squared difference between the observed actual outcome values and the values predicted by the model. So, MSE = mean((observeds - predicteds)^2) and RMSE = sqrt(MSE). The lower the RMSE, the better the model.

  3. Residual Standard Error (RSE), also known as the model sigma, is a variant of the RMSE adjusted for the number of predictors in the model. The lower the RSE, the better the model. In practice, the difference between RMSE and RSE is very small, particularly for large multivariate data.

  4. Mean Absolute Error (MAE), like the RMSE, the MAE measures the prediction error. Mathematically, it is the average absolute difference between obsered and predicted out-comes, MAE = mean(abs(observeds - predicteds)) . MAE is less sensitive to outliers compared to RMSE.

The problem with the above metrics, is that they are sensible to the inclusion of additional variables in the model, even if those variables don’t have significant contribution in explaining the outcome. Put in other words, including additional variables in the model will always increase the R2 and reduce the RMSE. So, we need a more robust metric to guide the model choice.

Concerning R2, there is an adjusted version, called Adjusted R-squared, which adjusts the R2 for having too many variables in the model.

Additionally, there are four other important metrics - AIC, AICc, BIC and Mallows Cp - tha are commonly used for model evaluation and selection. These are an unbiased estimate of the model prediction error MSE. The lower these metrics, he better the model.

  1. AIC stands for (Akike’s Information Criteria), a metric developeed by the Japanese Statistician, Hirotugu Akaike, 1970. The basic idea of AIC is to penalize the inclusion of additional variables to a model. It adds a penalty that increases the error when including additional terms. The lowwer the AIC, the better the model.

  2. AICc is a version of AIC corrected for small sample sizes.

  3. BIC (or Bayesian information criteria) is a variant of AIC with a strong penalty for including additional variables to the model.

  4. Mallows Cp: Avariant of AIC developed by Colin Mallows.

Generally, most commonly used metrics, for measuring regression model quality and comparing models, are: Adjusted R2, AIC, BIC and Cp

In the following sections, we’ll sho you how to compute these above mentioned metrics.

library(modelr)
library(broom)
## 
## Attaching package: 'broom'
## The following object is masked from 'package:modelr':
## 
##     bootstrap
# Load the data
data("swiss")

# Inspect the data
sample_n(swiss,3)

Building regression models

We start by creating two models:

  1. Model 1, including all predictors
  2. Model 2, including all predictors except the variable Examination
model.swiss.1 <- lm(Fertility~., data = swiss)
summary(model.swiss.1)
## 
## Call:
## lm(formula = Fertility ~ ., data = swiss)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.2743  -5.2617   0.5032   4.1198  15.3213 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      66.91518   10.70604   6.250 1.91e-07 ***
## Agriculture      -0.17211    0.07030  -2.448  0.01873 *  
## Examination      -0.25801    0.25388  -1.016  0.31546    
## Education        -0.87094    0.18303  -4.758 2.43e-05 ***
## Catholic          0.10412    0.03526   2.953  0.00519 ** 
## Infant.Mortality  1.07705    0.38172   2.822  0.00734 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.165 on 41 degrees of freedom
## Multiple R-squared:  0.7067, Adjusted R-squared:  0.671 
## F-statistic: 19.76 on 5 and 41 DF,  p-value: 5.594e-10
data.frame(
  R2 = rsquare(model.swiss.1, data = swiss),
  RMSE = rmse(model.swiss.1, data = swiss),
  MAE = mae(model.swiss.1, data = swiss),
  AIC = AIC(model.swiss.1),
  BIC = BIC(model.swiss.1)
)
model.swiss.2 <- lm(Fertility~. - Examination, data = swiss)

summary(model.swiss.2)
## 
## Call:
## lm(formula = Fertility ~ . - Examination, data = swiss)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.6765  -6.0522   0.7514   3.1664  16.1422 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      62.10131    9.60489   6.466 8.49e-08 ***
## Agriculture      -0.15462    0.06819  -2.267  0.02857 *  
## Education        -0.98026    0.14814  -6.617 5.14e-08 ***
## Catholic          0.12467    0.02889   4.315 9.50e-05 ***
## Infant.Mortality  1.07844    0.38187   2.824  0.00722 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.168 on 42 degrees of freedom
## Multiple R-squared:  0.6993, Adjusted R-squared:  0.6707 
## F-statistic: 24.42 on 4 and 42 DF,  p-value: 1.717e-10
data.frame(
  R2 = rsquare(model.swiss.2, data = swiss),
  RMSE = rmse(model.swiss.2, data = swiss),
  MAE = mae(model.swiss.2, data = swiss),
  AIC = AIC(model.swiss.2),
  BIC = BIC(model.swiss.2)
)
glance(model.swiss.1)
glance(model.swiss.2)

Manual computation of R2, RMSE and MAE

swiss %>% add_predictions(model.swiss.1) %>% summarise(
  R2 = cor(Fertility, pred)^2,
  MSE = mean(Fertility - pred)^2,
  RMSE = sqrt(MSE),
  MAE = mean(abs(Fertility - pred))
)
  • Comparing regression models performance
glance(model.swiss.1) %>% select(adj.r.squared, sigma, AIC, BIC, p.value)
glance(model.swiss.2) %>% select(adj.r.squared, sigma, AIC, BIC, p.value)

From the output above, it can be seen that:

  1. The two models have exactly the samed Adjusted R2 (0.67), meaning that they are equivalent in explaining the outcome, here fertility score. Additionally, they have the same amount of residual standard error (RSE or sigma = 7.17). However, the model 2 is more simple than model 1 because it incorporates less variables. All things equal, the simple model is always better in statistics.

  2. The AIC and the BIC of the model 2 are lower than those of the model1. In model comparison strategies, the model with the lowest AIC and BIC score is preferred.

  3. Finally, the F-statistic p.value of the model 2 is lower than the one of the model 1. This means that the model 2 is statistically more significant compared to model 1, which is consistent to the avove conclusion.

Note that, the RMSE and the RSE are measured in the same scale as the outcome variable. Dividing the RSE by the average value of the outcome variable will give you the prediction error rate, which should be as small as possible:

sigma(model.swiss.1)/mean(swiss$Fertility)
## [1] 0.1021544

In our example the average prediction error rate is 10%

Cross-validation

Cross-validation refers to a set of methods for measuring the performance of a given predictive model on new test data sets.

The basic idea, behind cross-validation techniques, consists of dividing the data into two sets:

  1. The training set, used to train (i.e. build) the model;
  2. and the testing set (or validation set), used to test (i.e. validate) the model by estimating the prediction error.

Cross-validation is also known as a resampling method because it involves fitting the same statistical method multiple times using different subsets of the data.

  1. The different cross-validation methods for assessing model performance. We cover the following approches:

    • validation set approach (or data split)
    • Leave On Out Cross Validation
    • k-fold Cross Validation
    • Repeated k-fold Cross Validation

Model performance metrics

After building a model, we are interested in determining the accuracy of this model on predicting the outcome for new unseen observations not used to build the model. Put in other words, we want to estimate the prediction error.

To do so, the basic strategy is to:

  1. Build the model on a training data set
  2. Apply the model on a new test data set to make predictions
  3. Compute the prediction errors

Cross-validation methods

Briefly, cross-validation algorithms can be summarized as follow:

  1. Reserve a small sample of the data set
  2. Build (or train) the model using the remaining part of the data set
  3. Test the effectiveness of the model on the reserved sample of the data set. If the model works well on the test data set, then it’s good.

The following sections describe the diffferent cross-validation techniques.

1. The Validation set Approach

The validation set approach consists of randomly splitting the data into two sets: one set is used to train the model and the remaining other set is used to test the model.

The process works as follow:

  1. Build (train) the model on the training data set
  2. Appply the model to the test data set to predict the outcome of new unseen observations
  3. Quantify the prediction error as the mean squared difference between the observed and the predicted outcome values.

The example below splits the swiss data set so that 80% is used for training a linear regression model and 20% is used to evaluate the model performance.

# Split the data into training and test set 
set.seed(123)

training.samples <- swiss$Fertility %>%
  createDataPartition(p=0.8, list = FALSE)

train.swiss.data <- swiss[training.samples, ]
test.swiss.data <- swiss[-training.samples, ]

# Build the model

model.swiss.lm <- lm(Fertility ~ ., data = train.swiss.data)

# Make predictions and compute the R2, RMSE and MAE
predictions.swiss.lm <- model.swiss.lm %>%  predict(test.swiss.data)

data.frame(
  R2 = R2(predictions.swiss.lm, test.swiss.data$Fertility),
  RMSE = RMSE(predictions.swiss.lm, test.swiss.data$Fertility),
  MAE = MAE(predictions.swiss.lm, test.swiss.data$Fertility)
)

When comparing two models, the one that produces the lowest test sample RMSE is the preferred model.

The RMSE and the MAE are measured in the same scale as the outcome variable. Dividing the RMSE by the average value of the outcome variable will give you the prediction error rate, which sould be as small as possible:

RMSE(predictions.swiss.lm, test.swiss.data$Fertility)/mean(test.swiss.data$Fertility)
## [1] 0.1281514

Note that, the validation set method is only useful when you have a large data set that can be partitioned. A disadvantage is that we build a model on a fraction of the data set only, possibly leaving out some interesting information about data, leading to higher bias. Therefore, the test error rate can be highly variable, depending on which observations are included in the training set and which observation are included in the validation set.

2. Leave one out cross validation - LOOCV

This method works as follows: 1. Leave out one data point and build the model on the rest of the data set 2. Test the model against the data point that is left out at step 1 and record the test error associated with the prediction 3. Repeat the process for all data points 4. Compoute the overall prediction error by taking the average of all these test error estimates recoreded at step3.

Practical example in R using the caret package:

# Define training control
train.control <- trainControl(method = "LOOCV")

# Train the model

model.loocv <- train(Fertility ~., data = swiss, method = "lm", trControl = train.control)
# Summarize the results
print(model.loocv)
## Linear Regression 
## 
## 47 samples
##  5 predictor
## 
## No pre-processing
## Resampling: Leave-One-Out Cross-Validation 
## Summary of sample sizes: 46, 46, 46, 46, 46, 46, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   7.738618  0.6128307  6.116021
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE

The advantage of the LOOCV method is that we make use all data points reducing potential bias.

However, the process is repeated as many times as there are data points, resulting to a higher exection time when n is extreamely large.

Additionally, we test the model performance against one data, point at each iteration. This might result to higher variation in the prediction error, if some data points are outliers. So, we need a good ratio of testing data points, a solution provided by the k-fold cross-validation method.

K-fold cross-validation

The k-fold cross validation method evaluates the model performance on different subset of the training data and then calculate the average prediction error rate. The algorithm is as follow:

  1. Randomly split the data into k-subset (or k-fold) (for example 5 subsets)
  2. Reserve on subset and train the model on all other subsets
  3. Test the model on the reserved subset and record the prediction error
  4. Repeat this procss untill each of the k subsets has served as the test set.
  5. Compute the average of the k recorded errors. This is called the cross validation error serving as the performance metric for the model

K-fold cross-validation (CV) is a robust method for estimating the accuracy of a model. The most obvious advantage of k-fold CV compared to LOOCV is computational. A less obvious but potentially more important advantage of k-fold CV is that it often fives more accurate estimates of the test error rate than does LOOCV

In practice, one typically performs k-fold cross-validation using k = 5 or k = 10, as these values have been shown empirically to yield test error rate estimates that suffer neither from excessively high bias nor from very high variance.

The following example uses 10-fold cross validation to estimate the prediction error. Make sure to set seed for reproducibility.

# Define training control
set.seed(123)

train.control <- trainControl(method = "cv", number = 10)

# Train the model
model.cv <- train(Fertility~., data = swiss, method = "lm", trControl = train.control)

# Summarize the results
print(model.cv)
## Linear Regression 
## 
## 47 samples
##  5 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 43, 42, 42, 41, 43, 41, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   7.379432  0.7512317  6.032731
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE

4. Repeated K-fold cross-validation

The process of splitting the data into k-folds can be repeated a number of times, this is called repeated k-fold cross validation.

Bootstrap procedure

The bootstrap method is used to quantify the uncertainty with a given statistical estimator or with a predictive model.

It consists of randomly selecting a sample of n observations from the original data set. This subset, called bootstrap data set is then used to evaluate the model.

This procedure is repeated a large number of times and the standard error of the bootstrap estimate is then calculated. The results provide an indication of the variance of the model performance.

Note that, the sampling is performed with replacement, which means that the same observation can occur more than once in the boostrrap data set.

Evaluating a predictive model performance

# Define training control

train.control.bootstrap <- trainControl(method = "boot", number = 100)

model.bootstrap <- train(Fertility ~., data = swiss, method = "lm", trControl = train.control.bootstrap)

# Summarize the results
print(model.bootstrap)
## Linear Regression 
## 
## 47 samples
##  5 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (100 reps) 
## Summary of sample sizes: 47, 47, 47, 47, 47, 47, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   8.432357  0.6048287  6.791066
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE

Quantifying an estimator uncertainty and confidence intervals

The bootstrap approach can be used to quantify the uncertainty (or standard error) associated with any given statistical estimator.

For example, you might want to estimate the accuracy of the linear regression beta coefficients using bootstrap method.

The different steps are as follows:

  1. Create a simple function, model_coef(), that takes the swiss data set as well as the indices for the observations, and returns the regression coefficients.
  2. Apply the function boot_fun() to the full data set of 47 observations in order to compute the coefficients

We start by creating a function that retrns the regression model coefficeients:

model_coef <- function(data, index) {
  coef(lm(Fertility~., data = data, subset = index))
}

model_coef(swiss, 1:47)
##      (Intercept)      Agriculture      Examination        Education 
##       66.9151817       -0.1721140       -0.2580082       -0.8709401 
##         Catholic Infant.Mortality 
##        0.1041153        1.0770481
library(boot)
## 
## Attaching package: 'boot'
## The following object is masked from 'package:lattice':
## 
##     melanoma
boot(swiss, model_coef, 500)
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = swiss, statistic = model_coef, R = 500)
## 
## 
## Bootstrap Statistics :
##       original       bias    std. error
## t1* 66.9151817  0.572138874 10.84264891
## t2* -0.1721140 -0.004654338  0.06456320
## t3* -0.2580082 -0.027889510  0.25405701
## t4* -0.8709401  0.002319556  0.21736743
## t5*  0.1041153 -0.002166432  0.03067023
## t6*  1.0770481  0.008581232  0.43205390

In the output above,

  • original column corresponds to the regression coefficients. The associated standard errors are given in the column std.error .

  • t1 corresponds to the intercept, t2 conrresponds to *Agriculture** and so on ..

For example, it can be observe that, the standard error (SE) of the regression coefficient associated with Agriculture is 0.06

Note that, the standard errors measure the variability/accuracy of the beta coefficients. It can be used to compute the confidence intervals of the coefficients.

For example, the 95% confidence interval for a given coefficient b is defined as b+/-1.96*SE(b), where:

  • The lower limits of b = b-1.96*SE(b) = -0.172 - (1.96*0.0680) = -0.306(for Agriculture variable)
  • The Upper limits of b = b+1.96*SE(b) = -0.172 + (1.96*0.0680) = -0.03743121(for Agriculture variable)

That is, there is approximately a 95% chance that the interval[-0.306,-0.036] will contain the true value of the coefficient.

Using the standard lm() function gives a slightly different standard errors, because the linear model make some assumptions about the data:

summary(lm(Fertility ~., data = swiss))$coef
##                    Estimate  Std. Error   t value     Pr(>|t|)
## (Intercept)      66.9151817 10.70603759  6.250229 1.906051e-07
## Agriculture      -0.1721140  0.07030392 -2.448142 1.872715e-02
## Examination      -0.2580082  0.25387820 -1.016268 3.154617e-01
## Education        -0.8709401  0.18302860 -4.758492 2.430605e-05
## Catholic          0.1041153  0.03525785  2.952969 5.190079e-03
## Infant.Mortality  1.0770481  0.38171965  2.821568 7.335715e-03

The bootstrap approach does not rely on any of these assumptions made by the linear model and so it is likely giving a more accurate estimate on the coefficients standard errors than tis the summary() function.

Model Selection

Best Subsets Regression

library(tidyverse)
library(caret)
library(leaps)

Computing best subset regression

The R function regsubsets() [leaps package] can be used to identify different best models of different sizes.

You need to specify the option nvmax, which represents the maximum number of predictors to incorporate in the mode. For example, if nvmax = 5, the function will return up to the best 5-variables model, that is, it returns the best 1-varable model, the best 2-variables model, .., the best 5-variables models.

In our example, we have only 5 predictor variables in the data. So, we’ll use nvmax = 5

models.regsubsets <- regsubsets(Fertility~., data= swiss, nvmax = 5)
summary(models.regsubsets)
## Subset selection object
## Call: regsubsets.formula(Fertility ~ ., data = swiss, nvmax = 5)
## 5 Variables  (and intercept)
##                  Forced in Forced out
## Agriculture          FALSE      FALSE
## Examination          FALSE      FALSE
## Education            FALSE      FALSE
## Catholic             FALSE      FALSE
## Infant.Mortality     FALSE      FALSE
## 1 subsets of each size up to 5
## Selection Algorithm: exhaustive
##          Agriculture Examination Education Catholic Infant.Mortality
## 1  ( 1 ) " "         " "         "*"       " "      " "             
## 2  ( 1 ) " "         " "         "*"       "*"      " "             
## 3  ( 1 ) " "         " "         "*"       "*"      "*"             
## 4  ( 1 ) "*"         " "         "*"       "*"      "*"             
## 5  ( 1 ) "*"         "*"         "*"       "*"      "*"

Model selection criteria: Adjusted R2, Cp and BIC

The summary() function returns some metrics - Adjusted R2, Cp and BIC allowing us to identify the best overall model, where best is defined as the model that maximize the adjusted R2 and minimize the prediction errors

The adjusted R2 represents the proportion of variation, in the outcome, that are explained by variation in predictors values. The higher the adjusted R2, the better the model.

The best model, according to each of these metrics, can be extracted as follow:

res.sum <- summary(models.regsubsets)

data.frame(
  Adj.R2 = which.max(res.sum$adjr2),
  CP = which.min(res.sum$cp),
  BIC = which.min(res.sum$bic)
)

There is no single correct solution to model selection, each of these criteria will lead to slightly different models.

Remembert that,

“all models are wrong, some models are useful”

So, we have different “best” models depending on which metrics we consider. We need additonal strategies.

Note also that the adjusted R2, BIC and Cp are calculated on the training data that have been used to fit the model. This means that, the model selection, using these metrics, is possibly subject to overfitting and may not perform as well when applied to new data.

A more rigorous approach is to select a models based on the prediction error computed on a new test data usin k-fold cross-validation techniques

K-fold cross-validation

Here, we’ll follow the procedure below:

  1. Extract the different model formulas from the models object
  2. Train a linear model on the formula using k-fold cross-validation(with k=5) and compute the prediction error of each model

We start by defining two helper function:

  1. get_model_formula(), allowing to access easily the formula of the models returned by the function regsubsets().
#id: model id
#object: regsubsets object
#data: data used to fit regsubsets

get_model_formula <- function(id, object){
  models <- summary(object)$which[id,-1]
  #get outcome variable
  
  form <- as.formula(object$call[[2]])
  outcome <- all.vars(form)[1]
  #Get model predictors
  predictors <- names(which(models == TRUE))
  predictors <- paste(predictors, collapse = "+")
  # Build model formula
  as.formula(paste(outcome, "~", predictors))
}
get_model_formula(3,models.regsubsets)
## Fertility ~ Education + Catholic + Infant.Mortality
## <environment: 0x7ff6b7458560>
  1. get_cv_error(), to get the cross-validation (CV) error for a given model:
get_cv_error <- function (model.formula, data){
  set.seed(1)
   train.control <- trainControl(method = "cv", number = 5)
   cv <- train(model.formula, data = data, method = "lm", trControl = train.control)
   cv$results$RMSE
}

Use the above defined method to compute the prediction error

# Compute cross-validation error

model.ids <- 1:5
cv.errors <- map(model.ids, get_model_formula, models.regsubsets) %>%
  map(get_cv_error, data = swiss) %>%
  unlist()
cv.errors
## [1] 9.422610 8.452344 7.926889 7.678508 7.923860
# Select the model that minimize the CV error
which.min(cv.errors)
## [1] 4

It can be seen that the model with 4 variables is the best model. It has the lower prediction error. The regression coefficients of this model can be extracted as follow:

coef(models.regsubsets, 4)
##      (Intercept)      Agriculture        Education         Catholic 
##       62.1013116       -0.1546175       -0.9802638        0.1246664 
## Infant.Mortality 
##        1.0784422

Stepwise Regression

  1. Forward selection: Which starts with no predictors in the model, Iteratively adds the most contributive predictor, and stops when the improvement is no longer statistically significant.

  2. Backwoard selection(or backward elimination): which starts with all predictors in the model (full model), iteratively removes the least contributive predictors, and stops when you have a model where all predictors are statistically significant.

  3. Stepwise selection (or sequential replacement) which is a combination of forward and backward selections. you start with no predictors, then sequentially add the most contributive predictors (like forward selection). After adding each new variable, remove any variables that no longer provide and improvement in the model fit

library(tidyverse)
library(caret)
library(leaps)

Computing setpwise regression

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
# Fit the full model
full.model <- lm(Fertility ~., data = swiss)

# Stepwise regression model
step.model <- stepAIC(full.model, direction = "both", trace = FALSE)

summary(step.model)
## 
## Call:
## lm(formula = Fertility ~ Agriculture + Education + Catholic + 
##     Infant.Mortality, data = swiss)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.6765  -6.0522   0.7514   3.1664  16.1422 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      62.10131    9.60489   6.466 8.49e-08 ***
## Agriculture      -0.15462    0.06819  -2.267  0.02857 *  
## Education        -0.98026    0.14814  -6.617 5.14e-08 ***
## Catholic          0.12467    0.02889   4.315 9.50e-05 ***
## Infant.Mortality  1.07844    0.38187   2.824  0.00722 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.168 on 42 degrees of freedom
## Multiple R-squared:  0.6993, Adjusted R-squared:  0.6707 
## F-statistic: 24.42 on 4 and 42 DF,  p-value: 1.717e-10

Penalized Regression: Ridge, Lasso and Elastic Net

Shrinkage methods

Ridge regression

Ridge regression shrinks the regression coefficients, so that varibles, with minor contribution to the outcome, have their coefficients close to zero.

The shrinkage of the coefficients is achieved by penalizing the regression model with a penalty term called L2-norm, which is the sum of the squared coefficients.

The amount of the penalty can be fine-tuned using a constant called lambda . Selecting a good value for lambda is critical

When lambda = 0, the penalty term has no effect, and ridge regression will produce the calssical least square coefficients. However, as lambda increases to infinite, the impact of the shrinkage penalty grows, and the ridge regression coefficients will get close zero.

Note that, in contrast to the ordinary least square regression, ridge regression is highly affected by the scale of the predictors. Therfore, it is better to standardize (i.e., scale) the predictors before applying the ridge regression, so that all the predictors are on the same scale.

The standardization of a predictor x, can be achieved using the formula x` = x/sd(x), where sd(x) is the stadard deviation of . The consequence of this is that, all standardized predictors will have a standard deviation of one allowing the final fit to not depend on the scale on which the predictors are measured.

One important advantage of the ridge regression, is that it still perfroms will, compared to the ordinary least square method, in a situation where you have a large multivariate data with the number of predictors (p) larger than the number of observations (n).

One disadvantage of the ridge regression is that, it will included all the predictors in the final model, unlike the stepwise regression methods, which will genrally select models that involve a reduced set of variables.

Ridge regression shrinks the coefficients towards zero, but it will not set any of them exactly to zero. The lasso regression is an alternative that overcomes this drawback.

Lasso regression

Lasso stands for Least Absolute Shrinkage and Selection Operator. It shrinks the regression coefficients toward zero by penalizing the regression model with a penalty term called L1-norm, which is the sum of the absolute coefficients.

In the case of lasso regression, the penalty has the effect of forcing some of the coefficient estimates, with a minor contribution to the model, to be ecactly equal to zero. This means that, lasso can be also seen as an alternative to the subset selection methods for performing variable selection in order to reduce the complexity of the model.

As in ridge regression, selecting a good value of lambda for the lasso is critical.

One obvious advantage of lasso regression over ridge regression, is that it produces simpler and more interpretable models that incorporate only a reduced set of the predictors. However, neither ridge regression nor the lasso will universally dominate the orther.

Generally, lasso mght perform better in a situation where some of the predictors have large coefficients, and the remaining predictors have very small coefficients.

Ridge regression will perform better when the outcome is a function of many predictors, all with coefficents of roughly equal size.

Cross-validation methods can be used for identifying which of these two techniques is better on a particular data set.

Elastic Net

Elastic Net produces a regression model that is penalized with both the L1-norm and L2-norm. The consequence of this is to effectively shrink coefficents (Like in ridge regression) and to set some coefficients to zero (as in LASSO).

Loading required R packages

library(tidyverse)
library(caret)
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: foreach
## 
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## Loaded glmnet 2.0-18

Preparing the data

We’ll use the Boston data set [in MASS package], for predicting the median house value(mdev), in Boston Suburbs, based on multiple predictor variables.

# Load the data
data("Boston", package = "MASS")

# Split the data into training and test set
set.seed(123)
boston.samples <- Boston$medv %>%
  createDataPartition(p=0.8, list = FALSE)

train.boston.data <- Boston[boston.samples, ]
test.boston.data <- Boston[-boston.samples, ]

Computing penalized linear regression

You need to create two objects:

  • y for storing the outcome variable
  • x for holding the predictor variables.
x <- model.matrix(medv~., train.boston.data)[,-1]
# Outcome variable
y <- train.boston.data$medv

We’ll use the R function glmnet() fro computing penalized linear regression models.

glmnet(x,y, alpha = 1, lambda = NULL)
## 
## Call:  glmnet(x = x, y = y, alpha = 1, lambda = NULL) 
## 
##       Df    %Dev   Lambda
##  [1,]  0 0.00000 6.908000
##  [2,]  1 0.09343 6.294000
##  [3,]  2 0.18670 5.735000
##  [4,]  2 0.26620 5.225000
##  [5,]  2 0.33220 4.761000
##  [6,]  2 0.38700 4.338000
##  [7,]  2 0.43240 3.953000
##  [8,]  2 0.47020 3.602000
##  [9,]  2 0.50150 3.282000
## [10,]  3 0.53390 2.990000
## [11,]  3 0.56100 2.725000
## [12,]  3 0.58350 2.482000
## [13,]  3 0.60210 2.262000
## [14,]  3 0.61770 2.061000
## [15,]  3 0.63050 1.878000
## [16,]  3 0.64120 1.711000
## [17,]  3 0.65010 1.559000
## [18,]  3 0.65740 1.421000
## [19,]  3 0.66360 1.294000
## [20,]  4 0.66990 1.179000
## [21,]  4 0.67560 1.075000
## [22,]  4 0.68030 0.979100
## [23,]  4 0.68420 0.892200
## [24,]  4 0.68750 0.812900
## [25,]  5 0.69080 0.740700
## [26,]  5 0.69380 0.674900
## [27,]  6 0.69730 0.614900
## [28,]  7 0.70200 0.560300
## [29,]  7 0.70610 0.510500
## [30,]  8 0.70960 0.465200
## [31,]  8 0.71430 0.423800
## [32,]  8 0.71820 0.386200
## [33,]  8 0.72140 0.351900
## [34,] 10 0.72520 0.320600
## [35,] 11 0.72850 0.292100
## [36,] 11 0.73120 0.266200
## [37,] 11 0.73350 0.242500
## [38,] 11 0.73540 0.221000
## [39,] 11 0.73690 0.201400
## [40,] 11 0.73820 0.183500
## [41,] 12 0.73950 0.167200
## [42,] 12 0.74220 0.152300
## [43,] 12 0.74440 0.138800
## [44,] 12 0.74620 0.126500
## [45,] 12 0.74760 0.115200
## [46,] 12 0.74880 0.105000
## [47,] 12 0.74990 0.095660
## [48,] 12 0.75070 0.087160
## [49,] 12 0.75140 0.079420
## [50,] 12 0.75200 0.072370
## [51,] 12 0.75250 0.065940
## [52,] 12 0.75290 0.060080
## [53,] 12 0.75320 0.054740
## [54,] 12 0.75350 0.049880
## [55,] 12 0.75370 0.045450
## [56,] 12 0.75390 0.041410
## [57,] 12 0.75410 0.037730
## [58,] 12 0.75420 0.034380
## [59,] 12 0.75430 0.031330
## [60,] 12 0.75440 0.028540
## [61,] 12 0.75450 0.026010
## [62,] 12 0.75460 0.023700
## [63,] 12 0.75460 0.021590
## [64,] 12 0.75460 0.019670
## [65,] 12 0.75470 0.017930
## [66,] 12 0.75470 0.016330
## [67,] 12 0.75470 0.014880
## [68,] 12 0.75480 0.013560
## [69,] 12 0.75480 0.012360
## [70,] 12 0.75480 0.011260
## [71,] 12 0.75480 0.010260
## [72,] 12 0.75480 0.009346
## [73,] 12 0.75480 0.008516
## [74,] 12 0.75480 0.007760
  • x: matrix of predictor variables
  • y: the response or outcome variable, which is a binary variable.
  • alpha: the elasticnet mixing parameter. Allowed values include:
    • “1”: for lasso regression
    • “0”: for ridge regression
    • a valued between 0 and 1 (say 0.25) for elastic net regression.
    • lambda: a numeric value defining the amount of shrinkage. Should be specify by analyst

In penalized regression, you need to specify a constant lambda to adjust the amount of the coefficent shrinkage. The best lambda for your data, can be defined as the lambda that minimize the cross-validation prediction error rate. This can be determined automatically using the function cv.glmnet().

In the following section, we start by computing ridge, lasso and elastic net regresion models. Next, we’ll compare the different models in order to choose the best one for our data The best model is defined as the model that has the lowest prediction error, RMSE

Computing ridge regression

# Find the best lambda using cross-validation
set.seed(123)
cv <- cv.glmnet(x,y, alpha = 0)

# Display the best lambda value
cv$lambda.min
## [1] 0.6907672
# Fit the final model on the training data
model.ridge <- glmnet(x,y, alpha = 0, lambda = cv$lambda.min)

# Display regression coefficients
coef(model.ridge)
## 14 x 1 sparse Matrix of class "dgCMatrix"
##                        s0
## (Intercept)  29.172942853
## crim         -0.073961766
## zn            0.035006472
## indus        -0.055702961
## chas          2.485565613
## nox         -11.449222575
## rm            3.976849313
## age          -0.002944308
## dis          -1.221350102
## rad           0.147920742
## tax          -0.006358355
## ptratio      -0.869860292
## black         0.009399874
## lstat        -0.483051940
# Make predictions on the test data
x.test <- model.matrix(medv~., test.boston.data)[,-1]
predictions.ridge <- model.ridge %>% predict(x.test) %>% as.vector()

# Model performance metrics

data.frame(
  RMSE = RMSE(predictions.ridge, test.boston.data$medv),
  Rsquare = R2(predictions.ridge, test.boston.data$medv)
)

Note that by default, the function glmnet() standardizes variables so that their scales are comparable. However, the coefficients are always returned on the original scale.

Computing lasso regression

The only difference between the R code used for ridge regression is that for lasso regression you need to specify the argument alpha = 1 instead of alpha - 0

# Find the best lambda using cross-validation
set.seed(123)
cv <- cv.glmnet(x,y,alpha = 1)
# Display the best lambda value
cv$lambda.min
## [1] 0.007759554
# Fit the final model on the training data
model <- glmnet(x,y, alpha = 1, lambda = cv$lambda.min)
#Display regression coefficients
coef(model)
## 14 x 1 sparse Matrix of class "dgCMatrix"
##                        s0
## (Intercept)  36.962374270
## crim         -0.092549948
## zn            0.048543171
## indus        -0.008321076
## chas          2.287418592
## nox         -16.832483690
## rm            3.810180182
## age           .          
## dis          -1.598716155
## rad           0.286797839
## tax          -0.012456750
## ptratio      -0.950997301
## black         0.009652895
## lstat        -0.528739166
# Make predictions on the test data
x.laso.test <- model.matrix(medv ~., test.boston.data)[,-1]
predictions.lasso <- model %>% predict(x.test) %>% as.vector()

# Model performance metrics
data.frame(
  RMSE = RMSE(predictions.lasso, test.boston.data$medv),
  Rsquare = R2(predictions.lasso, test.boston.data$medv)
)

Computing elastic net regression

The elastic net regression can be easily computed using the caret workflow, which invokes the glmnet package.

We use caret to automatically select the best tuning parameters alpha and lambda. the caret packages tests a range of possible alpha and lambda values, then selects the best values fro lambda and alpha, resulting to a final modl that is an elastic net modle.

Here, we’ll test the combination of 10 different values for alpha and lambda. This is specified using the option tuneLength.

The best alpha and lambda values are those values that minimize the cross-validation error

# # Build the model using the training set
# set.seed(123)
# model.elastic <- train(
#   medv ~., data = train.boston.data, method = "glmnet",
#   trcontrol = trainControl("cv", number = 5),
#   tuneLength = 10
# )
# 
# # Best tuning parameter
# model.elastic$bestTune

Principal Component and Partial Least Squares Regression

Principal component regression

The principal component regression(PCR) first applies Principal Component Analysis on the data set to summarize the original predictor variables into few new variables also known as principal component (PCs), which are a linear combination of the original data.

These PCs are then used to build the linear regression model. The number of principal components, to incorporate in the model, is chosen by corss-validation (cv). Note that, PCR is suitable when the data set contains highly correlated predictors.

Partial least squares regression

A possible drawback of PCR is that we have no guarantee that the selected principal components are associated with the outcome. Here, the selection of the principal components to incorporate in the model is not supervised by the outcome variable.

An alternative to PCR is the Partial Least Squares (PLS) regression, which identifies new principal components that not only summarizes the original predictors, but also that are related to the outcome. These components are then used to fit the regression model. So compared PCR, PLS uses a dimension reduction strategy that is supervised by the outcome.

Like PCR, PLS is convenient for data with highly-correlated predictors. The number of PCs used in PLS is generally chosen by cross-validation. Predictors and the outcome variables should be generally standardized, to make the variables comparable.

Loading required R packages

  • tidyverse for easy data manipulation and visualization
  • caret for easy machine learning workflow
  • pls, for computing PCR and PLS
library(tidyverse)
library(caret)
library(pls)
## 
## Attaching package: 'pls'
## The following object is masked from 'package:caret':
## 
##     R2
## The following object is masked from 'package:stats':
## 
##     loadings

Preparing the data

We’ll use the boston data set

Computing principal component regression

# Build the model on training set

set.seed(123)

model.pc <- train(
  medv~., data = train.boston.data, method = "pcr",
  scale = TRUE,
  trControl = trainControl("cv", number = 10),
  tuneLength = 10
)

# Plot model RMSE vs different values of components

plot(model.pc)

#Print the best tunning parameter ncomp that 
# minimize the cross-validation error, RMSE
model.pc$bestTune
summary(model.pc$finalModel)
## Data:    X dimension: 407 13 
##  Y dimension: 407 1
## Fit method: svdpc
## Number of components considered: 5
## TRAINING: % variance explained
##           1 comps  2 comps  3 comps  4 comps  5 comps
## X           47.48    58.40    68.00    74.75    80.94
## .outcome    38.10    51.02    64.43    65.24    71.17
# Make predictions
predictions.pc <- model.pc %>% predict(test.boston.data)
# Model performance metrics

data.frame(
  RMSE = caret::RMSE(predictions.pc, test.boston.data$medv),
  Rsquare = caret::R2(predictions.pc, test.boston.data$medv)
)

The plot shows the prediction error made by the model acccording to the number of principal components incorporated in the model.

Our analysis shows that, choosing five principal components (ncomp = 5) gives the smallest prediction error RMSE.

The summary() function also provides the percentage of variance explained in the predictors(x) and in the outcome (medv) using different numbers of components.

For example, 80.94% of the variation (or information) contained in the predictors are captured by 5 components (ncomp = 5). Additionally, setting ncomp = 5, captures 71% of the information in the outcome variable (medv), which is good

Taken together, cross-validation identifies ncomp - 5 as the optimal number of PCs that minimize the prediction error (RMSE) and explains enough variation in the predictors and in the outcome

Computing partial least squares

# Build the model on training set
set.seed(123)
model.pls <- train(
  medv~., data = train.boston.data, method = "pls",
  scale = TRUE,
  trControl = trainControl("cv", number = 10),
  tuneLength = 10
)

# Plot model RMSE vs Different values of components
plot(model.pls)

model.pls$bestTune
summary(model.pls$finalModel)
## Data:    X dimension: 407 13 
##  Y dimension: 407 1
## Fit method: oscorespls
## Number of components considered: 9
## TRAINING: % variance explained
##           1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps
## X           46.19    57.32    64.15    69.76    75.63    78.66    82.85
## .outcome    50.90    71.84    73.71    74.71    75.18    75.35    75.42
##           8 comps  9 comps
## X           85.92    90.36
## .outcome    75.48    75.49
#Make predictions
predictions.pls <- model.pls %>% predict(test.boston.data)

# Model performance metrics

data.frame(
  RMSE = caret::RMSE(predictions.pls, test.boston.data$medv),
  Rsquare = caret::R2(predictions.pls, test.boston.data$medv)
)

The optimal number of principal components included in the PLS model is 9. This captures 90% of the variation in the predictors and 75% of the variation in the outcome variable (medv).

In our example, the cross-validation error RMSE obtained with the PLS model is lower than the RMSE obtained using the PCR method. So, the PLS model is the best model, for explaining our data, compaed to the PCR model.

Classification

In this part, we’ll comver the follwing topics:

Most of the classification algorithms computes the probability of belonging to a given class.

Observations are then assigned to the class that have the highest probability score.

Generaly, you need to decide a probability cutoff above which you consider the an observation as belonging to a given class.

Dataset will be using

  1. PimaIndiansDiabetes2 data set

The Pima Indian Diabetes data set is available in the mlbench package. It will be used for binary classification.

# Load the data set
data("PimaIndiansDiabetes2", package = "mlbench")

# Inspect the data
head(PimaIndiansDiabetes2,4)
str(PimaIndiansDiabetes2)
## 'data.frame':    768 obs. of  9 variables:
##  $ pregnant: num  6 1 8 1 0 5 3 10 2 8 ...
##  $ glucose : num  148 85 183 89 137 116 78 115 197 125 ...
##  $ pressure: num  72 66 64 66 40 74 50 NA 70 96 ...
##  $ triceps : num  35 29 NA 23 35 NA 32 NA 45 NA ...
##  $ insulin : num  NA NA NA 94 168 NA 88 NA 543 NA ...
##  $ mass    : num  33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 NA ...
##  $ pedigree: num  0.627 0.351 0.672 0.167 2.288 ...
##  $ age     : num  50 31 32 21 33 30 26 29 53 54 ...
##  $ diabetes: Factor w/ 2 levels "neg","pos": 2 1 2 1 2 1 2 1 2 2 ...

The data contains 768 individuals (female) and 9 clinical variables for predictin the probability of individuals in being diabete-positive or negative:

Column Names Description
pregnant Number of times pregnant
glucose Plasma glucose concentration (glucose tolerance test)
pressure Diastolic blood pressure (mm Hg)
triceps Triceps skin fold thickness (mm)
insulin 2-Hour serum insulin (mu U/ml)
mass Body mass index (weight in kg/(height in m)^2)
pedigree Diabetes pedigree function
age Age (years)
diabetes Class variable (test for diabetes)

Performing the following steps might improve the accuracy of your model

set.seed(123)

training.Pima.samples <- PimaIndiansDiabetes2$diabetes %>%
  createDataPartition(p = 0.8, list = FALSE)

train.pima.data <- PimaIndiansDiabetes2[training.Pima.samples, ]
test.pima.data <- PimaIndiansDiabetes2[-training.Pima.samples, ]

Computing logistic regression

# Fit the model
model.lr.pima <- glm(diabetes~., data = train.pima.data, family = binomial)

# Summarize the model
summary(model.lr.pima)
## 
## Call:
## glm(formula = diabetes ~ ., family = binomial, data = train.pima.data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6751  -0.6666  -0.3588   0.6581   2.5650  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -10.196902   1.369247  -7.447 9.54e-14 ***
## pregnant      0.082032   0.061219   1.340  0.18025    
## glucose       0.036517   0.006381   5.723 1.05e-08 ***
## pressure      0.001333   0.013053   0.102  0.91863    
## triceps       0.008425   0.018649   0.452  0.65145    
## insulin      -0.001219   0.001441  -0.845  0.39784    
## mass          0.081434   0.030448   2.675  0.00748 ** 
## pedigree      1.186528   0.470790   2.520  0.01173 *  
## age           0.030886   0.020337   1.519  0.12884    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 402.38  on 314  degrees of freedom
## Residual deviance: 280.83  on 306  degrees of freedom
##   (300 observations deleted due to missingness)
## AIC: 298.83
## 
## Number of Fisher Scoring iterations: 5
## Make predictions

probabilities <- model.lr.pima %>% predict(test.pima.data, type = "response")
predicted.classes <- ifelse(probabilities > 0.5, "pos", "neg") 
# predicted.classes
# Model accuracy
mean(predicted.classes == test.pima.data$diabetes)
## [1] NA

Simple logistic regression

The simple logistic regression is used to predict the probability of class membership based on one single predictor variable.

The following R code builds a model to predict the probability of being diabetes-positive based on the plasma glucose concentration:

model.logit.simple <- glm(diabetes ~ glucose, data = train.pima.data, family = binomial)
summary(model.logit.simple)
## 
## Call:
## glm(formula = diabetes ~ glucose, family = binomial, data = train.pima.data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1322  -0.7882  -0.5157   0.8641   2.2706  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -5.555585   0.482107  -11.52   <2e-16 ***
## glucose      0.039188   0.003697   10.60   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 790.12  on 610  degrees of freedom
## Residual deviance: 637.56  on 609  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 641.56
## 
## Number of Fisher Scoring iterations: 4

The output above shows the estimate of the regression beta coefficients and their significance levels. The intercept (b0) is -5.55 and the coefficient of glucose variable is 0.039.

The logistic equation can be written as p = exp(-5.55 + 0.039*glucose)/[1+exp(-5.55+0.039*glucose)]. Using this formula, for each new glucose plasma concentration value, you can predict the probability of the individuals in bein diabetes positive.

Predictions can be easily made using the function predict(). Use the option type = “response” to directly obtain the probabilities

newdata <- data.frame(glucose = c(20,190))
probabilities <- model.logit.simple  %>% predict(newdata, type = "response")
predicted.classes <- ifelse(probabilities > 0.5, "pos", "neg")
predicted.classes
##     1     2 
## "neg" "pos"
train.pima.data %>%
  mutate(prob = ifelse(diabetes == "pos", 1, 0)) %>%
  ggplot(aes(glucose,prob)) +
  geom_point(alpha = 0.2) +
  geom_smooth(method = "glm", method.args = list(family = "binomial"))+
  labs(
    title = "Simple Logistic Regression Model",
    x = "Plasma Glucose Concentration",
    y = "Probability of being diabetes-pos"
  )
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).

plot(model.logit.simple)

Stepwise Logistic Regression

Stepwise logistic regression consists of automatically selecting a reduced number of predictor variables for building the best performing logistic regression model.

Data set: PimaIndiansDiabetes2

Quick start R code

library(MASS)
# Fit the model

removed.missing.data <- na.omit(train.pima.data)
model <- glm(diabetes ~ ., data = removed.missing.data , family = binomial) %>% stepAIC(trace = FALSE)

summary(model)
## 
## Call:
## glm(formula = diabetes ~ glucose + mass + pedigree + age, family = binomial, 
##     data = removed.missing.data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8024  -0.6631  -0.3716   0.6617   2.5631  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -10.081719   1.202909  -8.381  < 2e-16 ***
## glucose       0.033770   0.005536   6.101 1.06e-09 ***
## mass          0.083808   0.022726   3.688 0.000226 ***
## pedigree      1.110791   0.456960   2.431 0.015064 *  
## age           0.051063   0.014602   3.497 0.000471 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 402.38  on 314  degrees of freedom
## Residual deviance: 283.63  on 310  degrees of freedom
## AIC: 293.63
## 
## Number of Fisher Scoring iterations: 5
removed.missing.data.from.test <- na.omit(test.pima.data)

probabilities.step <- model %>% predict(removed.missing.data.from.test, type = "response")
predicted.classes.step.logit <- ifelse(probabilities.step > 0.5, "pos", "neg")

#Model Accuracy
mean(predicted.classes.step.logit == removed.missing.data.from.test$diabetes)
## [1] 0.7922078
df <- data.frame("Predicted" = predicted.classes.step.logit)
(df$Predicted == test.pima.data$diabetes)
## Warning in `==.default`(df$Predicted, test.pima.data$diabetes): longer
## object length is not a multiple of shorter object length
## Warning in is.na(e1) | is.na(e2): longer object length is not a multiple of
## shorter object length
##   [1]  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE
##  [12] FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##  [23]  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [34] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE
##  [45] FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE
##  [56]  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE
##  [67]  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
##  [78]  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE
##  [89]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [100]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE
## [111]  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE
## [122] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
## [133] FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [144] FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE

Penalized logistic regression

Penalized logistic regression imposes a penalty to the logistic model for having too many variables. This results in shrinking the coefficients of the less contributive variables toward zero. This is also know as regularization. The most commonly used penalized regression include

  • Ridge regression: variables with minor contribution have their coefficients close to zero. However, all the variables are incorporated in the model. This is useful when all variables need to be incorporated in the model according to domain knowledge.

  • lasso regression: the coefficients of some less contributive variables aer forced to be exactly zero. Only the most significant variables ae kept in the final model.

  • elastic net regression: the combination of ridge and lasso regression. It shrinks some coefficients toward zero (like ridge regerssion) and set some coefficients to exactly zero (like lasso regression)

Required packages

library(tidyverse)
library(caret)
library(glmnet)

Preparing the data

Data set: PimaIndiansDiabetes2

# Load the data and remove NAs
Pima.Indians.data <- na.omit(PimaIndiansDiabetes2)
# Inspect the data
sample_n(Pima.Indians.data, 3)
# Split the data into training and test set
set.seed(123)

training.samples <- Pima.Indians.data$diabetes %>%
  createDataPartition(p = 0.8, list = FALSE)

train.pima.indians <- Pima.Indians.data[training.samples, ]
test.pima.indians <- Pima.Indians.data[-training.samples, ]

Additional data preparation

The R function model.matrix() help to create the matrix of predictors and also automatically converts categorical predictors to appropriate dummy variables, which is required for the glmnet() function.

# Dummy code categorical predictor variables
x <- model.matrix(diabetes ~., train.pima.indians)[,-1]

# Convert the outcome (class) to a numerical variable
y <- ifelse(train.pima.indians$diabetes == "pos", 1, 0)

We’ll use the R function glmnet() for computing penalized logistic regression.

The simplified format is as follow:

library(glmnet)
glmnet(x, y, family = "binomial", alpha = 1, lambda = NULL)
## 
## Call:  glmnet(x = x, y = y, family = "binomial", alpha = 1, lambda = NULL) 
## 
##       Df      %Dev    Lambda
##  [1,]  0 3.147e-15 0.2480000
##  [2,]  1 3.690e-02 0.2259000
##  [3,]  1 6.743e-02 0.2059000
##  [4,]  1 9.292e-02 0.1876000
##  [5,]  1 1.144e-01 0.1709000
##  [6,]  1 1.325e-01 0.1557000
##  [7,]  1 1.479e-01 0.1419000
##  [8,]  1 1.610e-01 0.1293000
##  [9,]  1 1.722e-01 0.1178000
## [10,]  2 1.851e-01 0.1073000
## [11,]  2 1.970e-01 0.0978000
## [12,]  2 2.071e-01 0.0891100
## [13,]  2 2.158e-01 0.0812000
## [14,]  2 2.233e-01 0.0739800
## [15,]  3 2.314e-01 0.0674100
## [16,]  5 2.400e-01 0.0614200
## [17,]  5 2.484e-01 0.0559600
## [18,]  5 2.558e-01 0.0509900
## [19,]  5 2.620e-01 0.0464600
## [20,]  5 2.674e-01 0.0423400
## [21,]  5 2.721e-01 0.0385700
## [22,]  6 2.762e-01 0.0351500
## [23,]  6 2.798e-01 0.0320300
## [24,]  6 2.829e-01 0.0291800
## [25,]  6 2.856e-01 0.0265900
## [26,]  6 2.879e-01 0.0242300
## [27,]  6 2.898e-01 0.0220700
## [28,]  6 2.915e-01 0.0201100
## [29,]  6 2.929e-01 0.0183300
## [30,]  6 2.941e-01 0.0167000
## [31,]  6 2.951e-01 0.0152100
## [32,]  6 2.959e-01 0.0138600
## [33,]  6 2.967e-01 0.0126300
## [34,]  7 2.975e-01 0.0115100
## [35,]  7 2.984e-01 0.0104900
## [36,]  7 2.993e-01 0.0095550
## [37,]  7 2.999e-01 0.0087060
## [38,]  7 3.005e-01 0.0079330
## [39,]  7 3.010e-01 0.0072280
## [40,]  7 3.014e-01 0.0065860
## [41,]  7 3.018e-01 0.0060010
## [42,]  8 3.022e-01 0.0054680
## [43,]  8 3.025e-01 0.0049820
## [44,]  8 3.028e-01 0.0045390
## [45,]  8 3.030e-01 0.0041360
## [46,]  8 3.032e-01 0.0037690
## [47,]  8 3.034e-01 0.0034340
## [48,]  8 3.035e-01 0.0031290
## [49,]  8 3.037e-01 0.0028510
## [50,]  8 3.038e-01 0.0025980
## [51,]  8 3.038e-01 0.0023670
## [52,]  8 3.039e-01 0.0021570
## [53,]  8 3.040e-01 0.0019650
## [54,]  8 3.040e-01 0.0017900
## [55,]  8 3.040e-01 0.0016310
## [56,]  8 3.041e-01 0.0014860
## [57,]  8 3.041e-01 0.0013540
## [58,]  8 3.041e-01 0.0012340
## [59,]  8 3.042e-01 0.0011240
## [60,]  8 3.042e-01 0.0010250
## [61,]  8 3.042e-01 0.0009336
## [62,]  8 3.042e-01 0.0008506
## [63,]  8 3.042e-01 0.0007750
set.seed(123)
cv.lasso <- cv.glmnet(x,y, alpha = 1, family = "binomial")
# cv.lasso
# summary(cv.lasso)
# Fit the final model on the training data
model.logit.lasso <- glmnet(x,y, alpha = 1, family = "binomial", lambda = cv.lasso$lambda.min)

# Display regression coefficients
coef(model.logit.lasso)
## 9 x 1 sparse Matrix of class "dgCMatrix"
##                        s0
## (Intercept) -8.6158632778
## pregnant     0.0350263984
## glucose      0.0369158677
## pressure     .           
## triceps      0.0164757684
## insulin     -0.0003928031
## mass         0.0304940618
## pedigree     0.7854910473
## age          0.0362782651
# Make predictions on the test data
x.test <- model.matrix(diabetes~., test.pima.indians)[,-1]
probabilities.logit.lasso <- model.logit.lasso %>% predict(newx = x.test)
predict.classess.logit.lasso <- ifelse(probabilities.logit.lasso > 0.5, "pos", "neg")

#Model accuracy
observed.classes <- test.pima.indians$diabetes
mean(predict.classess.logit.lasso == observed.classes)
## [1] 0.7692308
plot(cv.lasso)

The plot displays the cross-validation error according to the log of lambda. The left dashed vertical line indicates that the log of the optimal value of lambda is approximately -5, which is the one that minimizes the prediction error. This lambda value will give the most accurate model. The exact value of lambda can be viewed as follow:

cv.lasso$lambda.min
## [1] 0.008706319
cv.lasso$lambda.1se
## [1] 0.06740987

Using lambda.min as the best lambda, gives the following regression coefficients:

coef(cv.lasso, cv.lasso$lambda.min)
## 9 x 1 sparse Matrix of class "dgCMatrix"
##                         1
## (Intercept) -8.6156146833
## pregnant     0.0350758913
## glucose      0.0369156360
## pressure     .           
## triceps      0.0164842605
## insulin     -0.0003924262
## mass         0.0304848446
## pedigree     0.7855063693
## age          0.0362650459
coef(cv.lasso, cv.lasso$lambda.1se)
## 9 x 1 sparse Matrix of class "dgCMatrix"
##                        1
## (Intercept) -4.657500725
## pregnant     .          
## glucose      0.026284471
## pressure     .          
## triceps      0.001905497
## insulin      .          
## mass         .          
## pedigree     .          
## age          0.017338402

Logistic Regression Assumptions and Diagnostics

The logistic regression method assumes that:

  • The outcome is a binary or dichotomous variable like yes vs no, positive vs negative, 1 vs 0.

  • there is alineare relationship between the logit of the outcome and each predictor variables. Recall that the outcome and each predictor variables. Recall that the logit function is logit(p) = log(p/(1-p)), where p is the probabilities of the outcome

  • There is no influential value (extreame values or outliers) in the continuous predictors

  • There is no high intercorrelation (i.e. multicollinearity) among the predictors.

Loading reequired R packages

library(tidyverse)
library(broom)
theme_set(theme_classic())
PimaIndiansDiabetes2.cleaned <- na.omit(PimaIndiansDiabetes2)
# Fit the logistic regression model
model.pima.logit2 <- glm(diabetes~., data = PimaIndiansDiabetes2.cleaned, family = binomial)
# Predict the probability (p) of diiabete positivity
probabilities.pima.logit <- predict(model.pima.logit2, type = "response")
predicted.pima.classes <- ifelse(probabilities.pima.logit > 0.5, "pos", "neg")
head(predicted.pima.classes)
##     4     5     7     9    14    15 
## "neg" "pos" "neg" "pos" "pos" "pos"
# train.pima.indians
mydata <- PimaIndiansDiabetes2.cleaned  %>%
  dplyr::select_if(is.numeric)
predictors <- colnames(mydata)
# Bind the logit and tidying the data for plot
model.pima.logit <- mydata %>%
  mutate(logit = log(probabilities.pima.logit/(1-probabilities.pima.logit))) %>%
  gather(key = "predictors", value = "predictor.value", -logit)
ggplot(model.pima.logit, aes(logit, predictor.value)) +
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess") +
  theme_bw() +
  facet_wrap(~predictors, scales = "free_y")

Influential values

Influential values are extreme individual data points that can alter the quaity of the logistic regression model. The most extreme values in the data can be examined by visualizing the Cook’s distance values.

Here we label the top 3 largest values:

plot(model.pima.logit2, which = 4, id.n = 3)

Note that, not all outliers are influential observations. To check kwhether the data contains potential influential observations, the standardized residual error can be inspected. Data points with an absolute standardized residuals above 3 represent possible outliers and may deserve closer attention.

The following R code computes the standardized residuals (.std.resid) and the Cook’s distance (.cooksd) using the R function augment()[broom package].

# Extract model results

model.pima.logit.data <- augment(model.pima.logit2) %>%
  mutate(index = 1:n())

The data for the top 3 largest values, according to the Cook’s distance, can be displayed as follow:

model.pima.logit.data %>% top_n(3, .cooksd)

Plot the standardized residuals:

ggplot(model.pima.logit.data, aes(index, .std.resid)) +
  geom_point(aes(color = diabetes), alpha = 0.5) +
  theme_bw()

Filter potential influential data points with abs(.std.res) > 3

model.pima.logit.data %>% filter(abs(.std.resid) > 3)

There is no influential observation in our data.

When we have outliers in a continuous predictor, potential solutions include:

  • Removing the concerned records
  • Transform the data into log scale
  • Use non parameteric methods

Multicollinearity

Multicollinearity corresponds to a situation where the data contain highly correlated predictor variables.

Multicollinearity is an important issue in regression analysis and should be fixed by removing the concerned variables. It can be assessed using the R function vif()

car::vif(model.pima.logit2)
## pregnant  glucose pressure  triceps  insulin     mass pedigree      age 
## 1.892387 1.378937 1.191287 1.638865 1.384038 1.832416 1.031715 1.974053

As a rule of thumb, a VIF value that exceeds 5 or 10 indicates a problematic amount of collinearity. In our example there is no collinearity: all variables have a value of VIF well below 5.

Multinomial logistic regression

The multinomial logistic regressin is an extension of the logistic regression for multclass classifcation tasks. It is used when the outcome involves more than two classes.

Loading required R packages

library(tidyverse)
library(caret)
library(nnet)
## 
## Attaching package: 'nnet'
## The following object is masked from 'package:mgcv':
## 
##     multinom

Preparing the data

We’ll use the iris data set

# Load the data
data("iris")
# Inspect the data
sample_n(iris,3)
# Split the data into training and test set
set.seed(123)
training.samples.iris <- iris$Species %>% createDataPartition(p=0.8, list = FALSE)
train.iris.data <- iris[training.samples.iris,]
test.iris.data <- iris[-training.samples.iris,]

Computing multinomial logistic regression

# Fit the model
model <- nnet::multinom(Species ~., data = train.iris.data)
## # weights:  18 (10 variable)
## initial  value 131.833475 
## iter  10 value 13.707358
## iter  20 value 5.548255
## iter  30 value 5.196272
## iter  40 value 4.985881
## iter  50 value 4.978698
## iter  60 value 4.970034
## iter  70 value 4.968625
## iter  80 value 4.967145
## iter  90 value 4.967125
## iter 100 value 4.967097
## final  value 4.967097 
## stopped after 100 iterations
# Summarize the model
summary(model)
## Call:
## nnet::multinom(formula = Species ~ ., data = train.iris.data)
## 
## Coefficients:
##            (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor    17.29267    -5.643165   -8.391525     14.61331   -2.091772
## virginica    -21.99694    -6.650591  -14.538985     22.01169   14.158731
## 
## Std. Errors:
##            (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor    43.11205     119.7240    198.5332     91.66177    59.26249
## virginica     43.68199     119.7252    198.5908     91.81816    59.59031
## 
## Residual Deviance: 9.934194 
## AIC: 29.93419
# Make predictions

predicted.species <- model %>% predict(test.iris.data)
head(predicted.species)
## [1] setosa setosa setosa setosa setosa setosa
## Levels: setosa versicolor virginica
# Model accuracy
mean(predicted.species == test.iris.data$Species)
## [1] 0.9666667

Discriminant Analysis

The following discriminant analysis methods will be described:

  • Linear discriminant analysis (LDA): Uses linear combinations of predictors to predict the class of a given observation. Assumes that the predictor variables (p) are normally distributed and the classes have identical variances (for univariate analysis, p = 1) or identical covariance matrices (for multvariate analysis, p > 1).

  • Quadratic discriminant analysis (QDA): More flexible than LDA. Here, there is no assumption that the covariance matrix of classes is the same

  • Mixture discriminant analysis (MDA): Each class is assumed to be a Gaussian mixture of subclasses.

  • Flexible Discriminant Analysis (FDA): Non-linear combination of predictors is used such as splines.

  • Regularized discriminant analysis (RDA): Regularization (or shrinkage) improves the estimate of the covariance matrices in situation where the number of predictors is larger than the number of samples in the training data. This leads to an improvement of the discriminant analysis.

Loading required R packages

library(tidyverse)
library(caret)
theme_set(theme_classic())

Preparing the data

  1. Split the data
# Lodd the data
data("iris")
# Split the data into training and test set
set.seed(123)
training.samples.iris <- iris$Species %>%
  createDataPartition(p=0.8, list = FALSE)

train.data.iris <- iris[training.samples.iris, ]
test.data.iris <- iris[-training.samples.iris, ]
  1. Normalize the data. Categorical
# Estimate preprocessing parameters
preproc.pram <- train.data.iris %>%
  preProcess(method = c("center", "scale"))

# Transform the data using the estimated parameters
train.transformed <- preproc.pram %>% predict(train.data.iris)
test.transformed <- preproc.pram %>% predict(test.data.iris)

Before performing LDA, consider: * Inspecting the univariate distribution of each variable and make sure that they are normally distribute. If not, you can transform them using log and root for exponential distributions and Box-Cox for skewed distributions.

*removing outliers from your data and standardize the variables to make their scale comparable.

R code

library(MASS)

# Fit the model
model.lda <- lda(Species ~., data = train.transformed)
# Make predictions
predictions <- model.lda %>% predict(test.transformed)

# Model accuracy
mean(predictions$class == test.transformed$Species)
## [1] 1
model.lda
## Call:
## lda(Species ~ ., data = train.transformed)
## 
## Prior probabilities of groups:
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333 
## 
## Group means:
##            Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa       -1.0120728   0.7867793   -1.2927218  -1.2496079
## versicolor    0.1174121  -0.6478157    0.2724253   0.1541511
## virginica     0.8946607  -0.1389636    1.0202965   1.0954568
## 
## Coefficients of linear discriminants:
##                     LD1         LD2
## Sepal.Length  0.9108023  0.03183011
## Sepal.Width   0.6477657  0.89852536
## Petal.Length -4.0816032 -2.22724052
## Petal.Width  -2.3128276  2.65441936
## 
## Proportion of trace:
##    LD1    LD2 
## 0.9905 0.0095
summary(model.lda)
##         Length Class  Mode     
## prior    3     -none- numeric  
## counts   3     -none- numeric  
## means   12     -none- numeric  
## scaling  8     -none- numeric  
## lev      3     -none- character
## svd      2     -none- numeric  
## N        1     -none- numeric  
## call     3     -none- call     
## terms    3     terms  call     
## xlevels  0     -none- list

LDA determines group means and computes, for each individual, the probability of belonging to the different groups. The individual is then affected to the group with the highest probability score.

The lda() outputs contain the following elements: * prior probabilities of groups: the proportion of training observation in each group. For example, there are 33% of the training observations in the setosa group * Group means: group center of gravity. Shows the mean of each variable in each group. * Coefficients of linear discriminants: Shows the linear combination of predictor variables that are used to form the LDA decisionrule. For example

LD1 = 0.01*Sepal.Length + 0.64*Sepal.Width - 4.08*Petal.Length = 2.3*Petal.Width LD2 = 0.03*Sepal.Length + 0.89*Sepal.Width - 2.2*Petal.Length - 2.6*Petal.Width

Using the function Plot() produces plots of the linear discriminants, obtained by computing LD1 and LD2 for each of the training observations

plot(model.lda)

lda.data <- cbind(train.transformed, predict(model.lda)$x)

ggplot(lda.data, aes(LD1, LD2)) +
  geom_point(aes(color = Species))

It can be seen that, our model correctly classified 100% of observations, which is excellent.

Note that, by default, the probability cutoff used to decide group-membership is 0.5

In some situation, you might want to increase the precision of the model. In this case you can fine-tune the model by adjusting the posterior probability cutoff. For example, you can increase or lower the cutoff

Variable selection:

Note that, if the predictor variables are standardized before computing LDA, the discriminator weights can be used as measures of variable importance for feature selection

Quardratic discriminant analysis - QDA

QDA is little bit more flexible than LDA, in the sense that it does not assumes the equality of variance/covariance. In other words, for QDA the covariance matrix can be different for each class.

LDA tends to be a better than QDA then you have a small training set.

In contrast, QDA is recommended if the training set is very large, so that the variance of the classifier is not a major issue, or if the assumption of a common covariance matrix for the K classes if clearly untenable

QDA can be computed using the R function qda() [MASS package]

library(MASS)
#Fit the model
model.qda <- qda(Species~., data = train.transformed)
model.qda
## Call:
## qda(Species ~ ., data = train.transformed)
## 
## Prior probabilities of groups:
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333 
## 
## Group means:
##            Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa       -1.0120728   0.7867793   -1.2927218  -1.2496079
## versicolor    0.1174121  -0.6478157    0.2724253   0.1541511
## virginica     0.8946607  -0.1389636    1.0202965   1.0954568
# Make predictions
predictions.qda <- model.qda %>% predict(test.transformed)

# Model accuracy
mean(predictions.qda$class == test.transformed$Species)
## [1] 1

Mixture discriminant analysis

The LDA classifier assumes that each class comes from a single normal (or Gaussian ) distribution.

This is too restrictive.

For MDA, ther are classes, and each class is assumed to be a Gaussian mixture of subclasses, where each data point has a probability of belonging to each class. Equality of covariance matrix, among classes, is still assumed

library(mda)
## Loading required package: class
## Loaded mda 0.4-10
model.mda <- mda(Species~., data = train.transformed)
model.mda
## Call:
## mda(formula = Species ~ ., data = train.transformed)
## 
## Dimension: 5 
## 
## Percent Between-Group Variance Explained:
##     v1     v2     v3     v4     v5 
##  96.77  99.63  99.86 100.00 100.00 
## 
## Degrees of Freedom (per dimension): 5 
## 
## Training Misclassification Error: 0.025 ( N = 120 )
## 
## Deviance: 14.005
# Make predictions

predicted.classes.mda <- model.mda %>% predict(test.transformed)

# Model accuracy
mean(predicted.classes.mda == test.transformed$Species)
## [1] 1

Flexible discriminant analysis - FDA

FDA is a flexible extension of LDA that uses non-linear combinations of predictors such as splines. FDA is useful to model multivariate non-normality or non-linear relationships among variables within each group, alowing for a more accurate classification

library(mda)
# Fit the model
model.fda <- fda(Species~., data = train.transformed)
model.fda
## Call:
## fda(formula = Species ~ ., data = train.transformed)
## 
## Dimension: 2 
## 
## Percent Between-Group Variance Explained:
##     v1     v2 
##  99.05 100.00 
## 
## Degrees of Freedom (per dimension): 5 
## 
## Training Misclassification Error: 0.025 ( N = 120 )
predicted.classes.fda <- model.fda %>% predict(test.transformed)
#Model accuracy
mean(predicted.classes.fda == test.transformed$Species)
## [1] 1

Regularized discriminant analysis

RDA builds a classification rule by regularizing the group covariance matrices allowing a more robust model against multicollinearity in the data. This might be very useful for a large multivariate data set containing highly correlated predictors.

Regularized discriminant analysis is a kind of a trade-off between LDA and QDA. Recall that, in LDA we assume equality of covariance matrix for all of the classes. QDA assumes different covariance matrices for all the classes. QDa assumes different covariance matrices for all the classes. Regularized discriminant analysis is an intermediate between LDA and QDA.

RDA shrinks the separte covariances of QDA toward a common covariance as in LDA. This improves the estimate of the covariance matrices in situations where the number of predictors is larger thant the number of samples in the training data, potentially leading to an improvement of the model accuracy.

library(klaR)
# Fit the model
model.rda <- rda(Species~., data = train.transformed)

#  Make predictions
predictions.rda <- model.rda %>% predict(test.transformed)

# Model accuracy

mean(predictions.rda$class == test.transformed$Species)
## [1] 1

Naive Bayes Classifier

The Naive Bayes Classifier is a simple and powerful method that can be used for binary and multiclass classification problems.

Naive Bayes classifier predicts the class membership probability of observations using Bayes theorem, which is based on conditional probability, that is the probability of something to happen, given that something else has already occured.

Will use PimaIndiansDiabetes2 data set

Computing Naive Bayes

library(klaR)
# Fit the model
model.naive <- NaiveBayes(diabetes ~., data = train.pima.indians)

model.naive
## $apriori
## grouping
##       neg       pos 
## 0.6687898 0.3312102 
## 
## $tables
## $tables$pregnant
##         [,1]     [,2]
## neg 2.890476 2.645282
## pos 4.471154 3.968215
## 
## $tables$glucose
##         [,1]     [,2]
## neg 112.1381 25.23393
## pos 147.1635 29.39590
## 
## $tables$pressure
##         [,1]     [,2]
## neg 69.19048 12.13173
## pos 73.54808 13.71264
## 
## $tables$triceps
##         [,1]      [,2]
## neg 27.83333 10.668822
## pos 32.68269  9.539152
## 
## $tables$insulin
##         [,1]     [,2]
## neg 137.0571 110.5755
## pos 214.2115 140.5018
## 
## $tables$mass
##         [,1]     [,2]
## neg 32.11524 6.905892
## pos 35.42500 6.825278
## 
## $tables$pedigree
##          [,1]      [,2]
## neg 0.4699952 0.3091545
## pos 0.6139904 0.3853125
## 
## $tables$age
##        [,1]      [,2]
## neg 28.6619  9.085715
## pos 35.8750 10.248076
## 
## 
## $levels
## [1] "neg" "pos"
## 
## $call
## NaiveBayes.default(x = X, grouping = Y)
## 
## $x
##     pregnant glucose pressure triceps insulin mass pedigree age
## 4          1      89       66      23      94 28.1    0.167  21
## 5          0     137       40      35     168 43.1    2.288  33
## 7          3      78       50      32      88 31.0    0.248  26
## 9          2     197       70      45     543 30.5    0.158  53
## 14         1     189       60      23     846 30.1    0.398  59
## 15         5     166       72      19     175 25.8    0.587  51
## 17         0     118       84      47     230 45.8    0.551  31
## 19         1     103       30      38      83 43.3    0.183  33
## 20         1     115       70      30      96 34.6    0.529  32
## 26        10     125       70      26     115 31.1    0.205  41
## 33         3      88       58      11      54 24.8    0.267  22
## 44         9     171      110      24     240 45.4    0.721  54
## 51         1     103       80      11      82 19.4    0.491  22
## 52         1     101       50      15      36 24.2    0.526  26
## 53         5      88       66      21      23 24.4    0.342  30
## 54         8     176       90      34     300 33.7    0.467  58
## 55         7     150       66      42     342 34.7    0.718  42
## 57         7     187       68      39     304 37.7    0.254  41
## 58         0     100       88      60     110 46.8    0.962  31
## 64         2     141       58      34     128 25.4    0.699  24
## 69         1      95       66      13      38 19.6    0.334  25
## 70         4     146       85      27     100 28.9    0.189  27
## 71         2     100       66      20      90 32.9    0.867  28
## 74         4     129       86      20     270 35.1    0.231  23
## 83         7      83       78      26      71 29.3    0.767  36
## 86         2     110       74      29     125 32.4    0.698  27
## 88         2     100       68      25      71 38.5    0.324  26
## 89        15     136       70      32     110 37.1    0.153  43
## 92         4     123       80      15     176 32.0    0.443  34
## 93         7      81       78      40      48 46.7    0.261  42
## 95         2     142       82      18      64 24.7    0.761  21
## 96         6     144       72      27     228 33.9    0.255  40
## 98         1      71       48      18      76 20.4    0.323  22
## 99         6      93       50      30      64 28.7    0.356  23
## 100        1     122       90      51     220 49.7    0.325  31
## 104        1      81       72      18      40 26.6    0.283  24
## 108        4     144       58      28     140 29.5    0.287  37
## 109        3      83       58      31      18 34.3    0.336  25
## 112        8     155       62      26     495 34.0    0.543  46
## 113        1      89       76      34      37 31.2    0.192  23
## 115        7     160       54      32     175 30.5    0.588  39
## 120        4      99       76      15      51 23.2    0.223  21
## 121        0     162       76      56     100 53.2    0.759  25
## 123        2     107       74      30     100 33.6    0.404  23
## 126        1      88       30      42      99 55.0    0.496  26
## 127        3     120       70      30     135 42.9    0.452  30
## 128        1     118       58      36      94 33.3    0.261  23
## 129        1     117       88      24     145 34.5    0.403  40
## 131        4     173       70      14     168 29.7    0.361  33
## 133        3     170       64      37     225 34.5    0.356  30
## 135        2      96       68      13      49 21.1    0.647  26
## 136        2     125       60      20     140 33.8    0.088  31
## 140        5     105       72      29     325 36.9    0.159  28
## 143        2     108       52      26      63 32.5    0.318  22
## 145        4     154       62      31     284 32.8    0.237  23
## 148        2     106       64      35     119 30.5    1.400  34
## 151        1     136       74      50     204 37.4    0.399  24
## 153        9     156       86      28     155 34.3    1.189  42
## 154        1     153       82      42     485 40.6    0.687  23
## 158        1     109       56      21     135 25.2    0.833  23
## 159        2      88       74      19      53 29.0    0.229  22
## 160       17     163       72      41     114 40.9    0.817  47
## 162        7     102       74      40     105 37.2    0.204  45
## 163        0     114       80      34     285 44.2    0.167  27
## 172        6     134       70      23     130 35.4    0.542  29
## 174        1      79       60      42      48 43.5    0.678  23
## 175        2      75       64      24      55 29.7    0.370  33
## 178        0     129      110      46     130 67.1    0.319  26
## 182        0     119       64      18      92 34.9    0.725  23
## 187        8     181       68      36     495 30.1    0.615  60
## 188        1     128       98      41      58 32.0    1.321  33
## 189        8     109       76      39     114 27.9    0.640  31
## 190        5     139       80      35     160 31.6    0.361  25
## 192        9     123       70      44      94 33.1    0.374  40
## 196        5     158       84      41     210 39.4    0.395  29
## 198        3     107       62      13      48 22.9    0.678  23
## 199        4     109       64      44      99 34.8    0.905  26
## 200        4     148       60      27     318 30.9    0.150  29
## 204        2      99       70      16      44 20.4    0.235  27
## 205        6     103       72      32     190 37.7    0.324  55
## 214        0     140       65      26     130 42.6    0.431  24
## 215        9     112       82      32     175 34.2    0.260  36
## 216       12     151       70      40     271 41.8    0.742  38
## 218        6     125       68      30     120 30.0    0.464  32
## 221        0     177       60      29     478 34.6    1.072  21
## 225        1     100       66      15      56 23.6    0.666  26
## 226        1      87       78      27      32 34.6    0.101  22
## 229        4     197       70      39     744 36.7    2.329  31
## 230        0     117       80      31      53 45.2    0.089  24
## 232        6     134       80      37     370 46.2    0.238  46
## 233        1      79       80      25      37 25.4    0.583  22
## 235        3      74       68      28      45 29.7    0.293  23
## 237        7     181       84      21     192 35.9    0.586  51
## 242        4      91       70      32      88 33.1    0.446  22
## 244        6     119       50      22     176 27.1    1.318  33
## 245        2     146       76      35     194 38.2    0.329  29
## 248        0     165       90      33     680 52.3    0.427  23
## 249        9     124       70      33     402 35.4    0.282  34
## 255       12      92       62       7     258 27.6    0.926  44
## 259        1     193       50      16     375 25.9    0.655  24
## 260       11     155       76      28     150 33.3    1.353  51
## 261        3     191       68      15     130 30.9    0.299  34
## 266        5      96       74      18      67 33.6    0.997  43
## 272        2     108       62      32      56 25.2    0.128  21
## 274        1      71       78      50      45 33.2    0.422  21
## 276        2     100       70      52      57 40.5    0.677  25
## 280        2     108       62      10     278 25.3    0.881  22
## 282       10     129       76      28     122 35.9    0.280  39
## 283        7     133       88      15     155 32.4    0.262  37
## 286        7     136       74      26     135 26.0    0.647  51
## 287        5     155       84      44     545 38.7    0.619  34
## 290        5     108       72      43      75 36.1    0.263  33
## 291        0      78       88      29      40 36.9    0.434  21
## 292        0     107       62      30      74 36.6    0.757  25
## 293        2     128       78      37     182 43.3    1.224  31
## 294        1     128       48      45     194 40.5    0.613  24
## 296        6     151       62      31     120 35.5    0.692  28
## 297        2     146       70      38     360 28.0    0.337  29
## 298        0     126       84      29     215 30.7    0.520  24
## 306        2     120       76      37     105 39.7    0.215  29
## 307       10     161       68      23     132 25.5    0.326  47
## 308        0     137       68      14     148 24.8    0.143  21
## 310        2     124       68      28     205 32.9    0.875  30
## 312        0     106       70      37     148 39.4    0.605  22
## 313        2     155       74      17      96 26.6    0.433  27
## 314        3     113       50      10      85 29.5    0.626  25
## 316        2     112       68      22      94 34.1    0.315  26
## 319        3     115       66      39     140 38.1    0.150  28
## 321        4     129       60      12     231 27.5    0.527  31
## 324       13     152       90      33      29 26.8    0.731  43
## 326        1     157       72      21     168 25.6    0.123  24
## 327        1     122       64      32     156 35.1    0.692  30
## 330        6     105       70      32      68 30.8    0.122  37
## 332        2      87       58      16      52 32.7    0.166  25
## 335        1      95       60      18      58 23.9    0.260  22
## 336        0     165       76      43     255 47.9    0.259  26
## 339        9     152       78      34     171 34.2    0.893  33
## 341        1     130       70      13     105 25.9    0.472  22
## 346        8     126       88      36     108 38.5    0.349  49
## 349        3      99       62      19      74 21.8    0.279  26
## 354        1      90       62      12      43 27.2    0.580  24
## 357        1     125       50      40     167 33.3    0.962  28
## 359       12      88       74      40      54 35.3    0.378  48
## 360        1     196       76      36     249 36.5    0.875  29
## 361        5     189       64      33     325 31.2    0.583  29
## 365        4     147       74      25     293 34.9    0.385  30
## 369        3      81       86      16      66 27.5    0.306  22
## 370        1     133      102      28     140 32.8    0.234  45
## 371        3     173       82      48     465 38.4    2.137  25
## 374        2     105       58      40      94 34.9    0.225  25
## 375        2     122       52      43     158 36.2    0.816  28
## 376       12     140       82      43     325 39.2    0.528  58
## 378        1      87       60      37      75 37.2    0.509  22
## 381        1     107       72      30      82 30.8    0.821  24
## 383        1     109       60       8     182 25.4    0.947  21
## 385        1     125       70      24     110 24.3    0.221  25
## 386        1     119       54      13      50 22.3    0.205  24
## 390        3     100       68      23      81 31.6    0.949  28
## 391        1     100       66      29     196 32.0    0.444  42
## 393        1     131       64      14     415 23.7    0.389  21
## 394        4     116       72      12      87 22.1    0.463  37
## 396        2     127       58      24     275 27.7    1.600  25
## 397        3      96       56      34     115 24.7    0.944  39
## 403        5     136       84      41      88 35.0    0.286  35
## 406        2     123       48      32     165 42.1    0.520  26
## 410        1     172       68      49     579 42.4    0.702  28
## 413        1     143       84      23     310 42.4    1.076  22
## 414        1     143       74      22      61 26.2    0.256  21
## 415        0     138       60      35     167 34.6    0.534  21
## 420        3     129       64      29     115 26.4    0.219  28
## 421        1     119       88      41     170 45.3    0.507  26
## 423        0     102       64      46      78 40.6    0.496  21
## 425        8     151       78      32     210 42.9    0.516  36
## 426        4     184       78      39     277 37.0    0.264  31
## 428        1     181       64      30     180 34.1    0.328  38
## 429        0     135       94      46     145 40.6    0.284  26
## 430        1      95       82      25     180 35.0    0.233  43
## 432        3      89       74      16      85 30.4    0.551  38
## 433        1      80       74      11      60 30.0    0.527  22
## 442        2      83       66      23      50 32.2    0.497  22
## 443        4     117       64      27     120 33.2    0.230  24
## 448        0      95       80      45      92 36.5    0.330  26
## 449        0     104       64      37      64 33.6    0.510  22
## 451        1      82       64      13      95 21.2    0.415  23
## 458        5      86       68      28      71 30.2    0.364  24
## 459       10     148       84      48     237 37.6    1.001  51
## 460        9     134       74      33      60 25.9    0.460  81
## 461        9     120       72      22      56 20.8    0.733  48
## 463        8      74       70      40      49 35.3    0.705  39
## 466        0     124       56      13     105 21.8    0.452  21
## 467        0      74       52      10      36 27.8    0.269  22
## 468        0      97       64      36     100 36.8    0.600  25
## 470        6     154       78      41     140 46.1    0.571  27
## 477        2     105       80      45     191 33.7    0.711  29
## 479        8     126       74      38      75 25.9    0.162  39
## 481        3     158       70      30     328 35.5    0.344  35
## 483        4      85       58      22      49 27.8    0.306  28
## 484        0      84       82      31     125 38.2    0.233  23
## 486        0     135       68      42     250 42.3    0.365  24
## 487        1     139       62      41     480 40.7    0.536  21
## 488        0     173       78      32     265 46.5    1.159  58
## 491        2      83       65      28      66 36.8    0.629  24
## 494        4     125       70      18     122 28.9    1.144  45
## 498        2      81       72      15      76 30.1    0.547  25
## 499        7     195       70      33     145 25.1    0.163  55
## 500        6     154       74      32     193 29.3    0.839  39
## 501        2     117       90      19      71 25.2    0.313  21
## 504        7      94       64      25      79 33.3    0.738  41
## 507        0     180       90      26      90 36.5    0.314  35
## 509        2      84       50      23      76 30.4    0.968  21
## 515        3      99       54      19      86 25.6    0.154  24
## 516        3     163       70      18     105 31.6    0.268  28
## 517        9     145       88      34     165 30.3    0.771  53
## 520        6     129       90       7     326 19.6    0.582  60
## 521        2      68       70      32      66 25.0    0.187  25
## 527        1      97       64      19      82 18.2    0.299  21
## 528        3     116       74      15     105 26.3    0.107  24
## 531        2     122       60      18     106 29.8    0.717  22
## 533        1      86       66      52      65 41.3    0.917  29
## 541        8     100       74      40     215 39.4    0.661  43
## 542        3     128       72      25     190 32.4    0.549  27
## 544        4      84       90      23      56 39.5    0.159  25
## 545        1      88       78      29      76 32.0    0.365  29
## 546        8     186       90      35     225 34.5    0.423  37
## 547        5     187       76      27     207 43.6    1.034  53
## 552        3      84       68      30     106 31.9    0.591  25
## 556        7     124       70      33     215 25.5    0.161  37
## 562        0     198       66      32     274 41.3    0.502  28
## 563        1      87       68      34      77 37.6    0.401  24
## 564        6      99       60      19      54 26.9    0.497  32
## 566        2      95       54      14      88 26.1    0.748  22
## 567        1      99       72      30      18 38.6    0.412  21
## 568        6      92       62      32     126 32.0    0.085  46
## 569        4     154       72      29     126 31.3    0.338  37
## 575        1     143       86      30     330 30.1    0.892  23
## 576        1     119       44      47      63 35.5    0.280  25
## 577        6     108       44      20     130 24.0    0.813  35
## 585        8     124       76      24     600 28.7    0.687  52
## 589        3     176       86      27     156 33.3    1.154  52
## 592        2     112       78      50     140 39.4    0.175  24
## 594        2      82       52      22     115 28.5    1.699  25
## 595        6     123       72      45     230 33.6    0.733  34
## 596        0     188       82      14     185 32.0    0.682  22
## 598        1      89       24      19      25 27.8    0.559  21
## 600        1     109       38      18     120 23.1    0.407  26
## 607        1     181       78      42     293 40.0    1.258  22
## 608        1      92       62      25      41 19.5    0.482  25
## 609        0     152       82      39     272 41.5    0.270  27
## 610        1     111       62      13     182 24.0    0.138  23
## 611        3     106       54      21     158 30.9    0.292  24
## 621        2     112       86      42     160 38.4    0.246  28
## 624        0      94       70      27     115 43.5    0.347  21
## 626        4      90       88      47      54 37.7    0.362  29
## 634        1     128       82      17     183 27.5    0.115  22
## 638        2      94       76      18      66 31.6    0.649  23
## 641        0     102       86      17     105 29.3    0.695  27
## 645        3     103       72      30     152 27.6    0.730  27
## 646        2     157       74      35     440 39.4    0.134  30
## 647        1     167       74      17     144 23.4    0.447  33
## 648        0     179       50      36     159 37.8    0.455  22
## 649       11     136       84      35     130 28.3    0.260  42
## 651        1      91       54      25     100 25.2    0.234  23
## 652        1     117       60      23     106 33.8    0.466  27
## 653        5     123       74      40      77 34.1    0.269  28
## 655        1     106       70      28     135 34.2    0.142  22
## 656        2     155       52      27     540 38.7    0.240  25
## 657        2     101       58      35      90 21.8    0.155  22
## 658        1     120       80      48     200 38.9    1.162  41
## 660        3      80       82      31      70 34.2    1.292  27
## 663        8     167      106      46     231 37.6    0.165  43
## 664        9     145       80      46     130 37.9    0.637  40
## 666        1     112       80      45     132 34.8    0.217  24
## 669        6      98       58      33     190 34.0    0.430  43
## 670        9     154       78      30     100 30.9    0.164  45
## 671        6     165       68      26     168 33.6    0.631  49
## 673       10      68      106      23      49 35.5    0.285  47
## 674        3     123      100      35     240 57.3    0.880  22
## 680        2     101       58      17     265 24.2    0.614  23
## 681        2      56       56      28      45 24.2    0.332  22
## 686        2     129       74      26     205 33.2    0.591  25
## 689        1     140       74      26     180 24.1    0.828  23
## 693        2     121       70      32      95 39.1    0.886  23
## 694        7     129       68      49     125 38.5    0.439  43
## 696        7     142       90      24     480 30.4    0.128  43
## 697        3     169       74      19     125 29.9    0.268  31
## 699        4     127       88      11     155 34.5    0.598  28
## 701        2     122       76      27     200 35.9    0.483  26
## 705        4     110       76      20     100 28.4    0.118  27
## 708        2     127       46      21     335 34.4    0.176  22
## 711        3     158       64      13     387 31.2    0.295  24
## 714        0     134       58      20     291 26.4    0.352  21
## 716        7     187       50      33     392 33.9    0.826  34
## 717        3     173       78      39     185 33.8    0.970  31
## 719        1     108       60      46     178 35.5    0.415  24
## 722        1     114       66      36     200 38.1    0.289  21
## 723        1     149       68      29     127 29.3    0.349  42
## 724        5     117       86      30     105 39.1    0.251  42
## 727        1     116       78      29     180 36.1    0.496  25
## 731        3     130       78      23      79 28.4    0.323  34
## 733        2     174       88      37     120 44.5    0.646  24
## 734        2     106       56      27     165 29.0    0.426  22
## 737        0     126       86      27     120 27.4    0.515  21
## 741       11     120       80      37     150 42.3    0.785  48
## 742        3     102       44      20      94 30.8    0.400  26
## 745       13     153       88      37     140 40.6    1.174  39
## 746       12     100       84      33     105 30.0    0.488  46
## 748        1      81       74      41      57 46.3    1.096  32
## 749        3     187       70      22     200 36.4    0.408  36
## 752        1     121       78      39      74 39.0    0.261  28
## 754        0     181       88      44     510 43.3    0.222  26
## 756        1     128       88      39     110 36.5    1.057  37
## 761        2      88       58      26      16 28.4    0.766  22
## 764       10     101       76      48     180 32.9    0.171  63
## 766        5     121       72      23     112 26.2    0.245  30
## 
## $usekernel
## [1] FALSE
## 
## $varnames
## [1] "pregnant" "glucose"  "pressure" "triceps"  "insulin"  "mass"    
## [7] "pedigree" "age"     
## 
## attr(,"class")
## [1] "NaiveBayes"
# Make predictions
prediction.naive <- model.naive %>% predict(test.pima.indians)
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 33
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 34
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 35
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 36
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 37
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 38
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 39
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 41
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 42
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 43
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 44
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 46
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 47
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 48
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 49
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 50
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 51
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 52
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 53
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 54
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 55
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 56
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 57
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 58
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 59
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 60
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 61
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 62
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 63
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 64
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 65
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 66
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 67
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 68
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 69
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 70
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 71
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 75
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 76
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 77
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 78
# Model accuracy
mean(prediction.naive$class == test.pima.indians$diabetes)
## [1] 0.8205128

Using caret R package

The caet R package can automatically train the model and assess the modle accuracy using k-fold cross-validation

library(klaR)

set.seed(123)

model.naive2 <- train(diabetes~., data = train.pima.indians, method = "nb", trControl = trainControl("cv", number = 10))
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
# make predictions
predicted.classes.naive2 <- model.naive2 %>% predict(test.pima.indians)
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 1
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 2
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 3
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 7
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 10
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 11
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 12
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 13
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 15
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 17
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 20
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 21
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 23
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 25
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 26
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 33
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 34
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 35
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 36
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 37
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 38
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 39
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 41
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 42
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 43
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 44
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 46
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 47
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 48
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 49
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 50
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 51
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 52
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 53
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 54
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 55
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 56
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 57
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 58
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 59
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 60
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 61
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 62
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 63
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 64
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 65
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 66
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 67
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 68
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 69
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 70
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 71
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 75
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 76
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 77
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 78
# Model n accuracy

mean(predicted.classes.naive2 == test.pima.indians$diabetes)
## [1] 0.8076923

Support Vector Machine

Support Vector Machine (or SVM) is a machine learning technique used for classification tasks. Briefly, SVM works by identifying the optimal decision boundary that separates data points from different groups (or classes), and then predicts the class of new observations based on the separation boundary.

Depending on the situations, the different groups might be separable by a linear straight line or by a non-linear boundary line.

Support vector machine methods can handle both linear and non-linear class boundaries. It can be used for both two-classs and multi-class classification problems.

In real life data, the separation boundary is generally nonlinear. Technically, the SVM algorithm perform a non-linear classification using what is called the kernel trick. The most commonly used kernel transformations ar e_polynomial kernel_ and radial kernel

Note that, there is also an extension of the SVM for regression, called support vector regression.

Loading required R packages

library(tidyverse)
library(caret)

Dataset

Data set: PimaIndiansDiabetes2 [in mlbench package]

# Load the data
data("PimaIndiansDiabetes2", package = "mlbench")
pima.data.cleaned <- na.omit(PimaIndiansDiabetes2)

# Split the data into training and test set
set.seed(123)

t.sample <- pima.data.cleaned$diabetes %>% createDataPartition(p = 0.8, list = FALSE)

SVM linear classifier

In the following example variables are normalized to make their scale comparable. This is automatically done before building the SVM classifier by setting the option preProcess = c(“center”, “scale”).

# Fit the model on the training set
set.seed(123)

model.svm <- train(
  diabetes~., data = train.pima.indians, method = "svmLinear",
  trContro = trainControl("cv", number = 10),
  preProcess = c("center", "scale")
)

# Make predictions on the test data
predicted.classes.svm <- model.svm %>% predict(test.pima.indians)
head(predicted.classes.svm)
## [1] neg pos neg pos pos neg
## Levels: neg pos
# Computed model accuracy rate
mean(predicted.classes.svm == test.pima.indians$diabetes)
## [1] 0.7820513

Note that, there is a tuning parameter C, also known as Cost, that determines the possible misclassifications. It essentially imposes a penalty to the model for making an error. The higher the value of C, the less likely it is that the SVM algorithm will misclassify a point.

By default caret builds the SVM linear classifier usin g C= 1. You can check this by typing model in R console

model.svm
## Support Vector Machines with Linear Kernel 
## 
## 314 samples
##   8 predictor
##   2 classes: 'neg', 'pos' 
## 
## Pre-processing: centered (8), scaled (8) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 314, 314, 314, 314, 314, 314, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.7862095  0.4893315
## 
## Tuning parameter 'C' was held constant at a value of 1

The following R code compute SVM for a grid values of C and choose automatically the final model for predictions:

# Fit the model on the training set
set.seed(123)

model.svm2 <- train(
  diabetes ~., data = train.pima.indians, method = "svmLinear",
  trControl = trainControl("cv", number = 10),
  tuneGrid = expand.grid(C = seq(0.1, 2, length = 20)),
  preProcess = c("center", "scale")
)

# Plot model accuracy vs different values of Cost
plot(model.svm2)

# Print the best tuning parameter C that maximizes model accuracy
model.svm2$bestTune

Let’s use the best model

# Make predictions on the test data 
predicted.classess.svm2 <- model.svm2 %>% predict(test.pima.indians)

# Compute model accuracy rate
mean(predicted.classess.svm2 == test.pima.indians$diabetes)
## [1] 0.7820513

SVM classifier using Non-Linear Kernel

To build a non-linear SVM classifier, we can use either polynomial kernel or radial kernel function.

  • Computing SVM using radial basis kernel
# Fit the model on the training set
set.seed(123)
model.svm.radial <- train(
  diabetes~., data = train.pima.indians, method = "svmRadial",
  trControl = trainControl("cv", number = 10),
  preProcess = c("center", "scale"),
  tuneLength = 10
)

# Print the best tuning parameter sigma and C that maximizes model accuracy

model.svm.radial$bestTune
# Make predictions on the test data
predicted.classess.svm.radial <- model.svm.radial %>% predict(test.pima.indians)
# Compute model Accuracy rate
mean(predicted.classess.svm.radial == test.pima.indians$diabetes)
## [1] 0.7948718
  • Computing SVM using polynomial basis kernel:
# Fit the model on the training set
set.seed(123)
model.svm.polynomial <- train(
  diabetes~., data = train.pima.indians, method = "svmPoly",
  trControl = trainControl("cv", number = 10),
  preProcess = c("center", "scale"),
  tuneLength = 4
)

# Print the best tuning parameter sigma and C that maximizes model accuracy

model.svm.polynomial$bestTune
# Make predictions on the test data
predicted.classess.svm.polynomial <- model.svm.polynomial %>% predict(test.pima.indians)
# Compute model Accuracy rate
mean(predicted.classess.svm.polynomial == test.pima.indians$diabetes)
## [1] 0.7948718

In our examples, it can be seen that the SVM classifier using non-linear kernel gives a better result compared to the linear model.

Classification Model Evaluation

After building a predictive classification model, you need to evaluate the performance of the model, that is how good the model is in predicting the outcome of new observations test data that have been not used to train the model.

The common use metrics and methods for assessing the performance of predictive classification models:

  • Average classification accuracy - representing the proportion of correctly classified observations.

  • Confusion matrix - which is 2x2 table showing four parameters, including the number of true positives, true negatives, false negatives and false positives.

  • Precision, Recall and Specificity - which are three major performance metrics describing a predictive classification model

  • ROC curve - which is a graphical summary of the overall performance of the model, showing the proportion of true positives and false positives at tall possible values of probability cutoff. The Area Under the Curve(AUC) summarizes the overall performance of the classifier.

Required R packages

  • tidyverse for easy data manipulation and visualization
  • caret for easy machine learning workflow
library(tidyverse)
library(caret)

Building a classification model

To keep things simple we’ll perform a binary classification, where the outcome variable can have only two possible values: negative vs positive.

We’ll compute an example of linear discriminant analysis model using the PimaIndiansDiabetes2 [mlbench package]

  1. Split the data into training (80%, used to build the model) and test set (20%, used to evaluate the model performance):
data("PimaIndiansDiabetes2", package = "mlbench")
pima.data.cleaned <- na.omit(PimaIndiansDiabetes2)
# Split the data into training and test set
set.seed(123)

t.sample <- pima.data.cleaned$diabetes %>% createDataPartition(p = 0.8, list = FALSE)

train.pima2 <- pima.data.cleaned[t.sample, ]
test.pima2 <- pima.data.cleaned[-t.sample, ]
  1. Fit the LDA model on the training set and make preditions on the test data:
library(MASS)
#Fit LDA

fit.lda <- lda(diabetes ~., data = train.pima2)

# Make predictions on the test data
predictions.pima2 <- predict(fit.lda, test.pima2)
preictions.probabilities2 <- predictions.pima2$posterior[,2]
predicted.classess2 <- predictions.pima2$class
observed.classes2 <- test.pima2$diabetes

Overall classification accuracy

The overall classification accuracy rate corresponds to the proportion of observations that have been correctly classified. Determining the raw classification accuracy is the first step in assessing the performance of a model.

accuracy <- mean(observed.classes2 == predicted.classess2)
accuracy
## [1] 0.8076923
error <- mean(observed.classes2 != predicted.classess2)
error
## [1] 0.1923077

From the output avove, the linear discrimant analysis correctly predicted the individual outcome in 81% of the cases. This is by far better than random guessing. The misclassification error rate can be calculated as 100-81 = 19%

In our example, a binary classifier can make two types of errors:

  • it can incorrectly assign an individual who is diabetes-positive to the diabetes-negative category
  • it can incorrectly assign an individual who is diabetes-negative to the diabetes-positive category.

The proportion of these two types of errors can be determined by creating a confution matrxi, which compare the predicted coutcome values agains the known outcome vaues.

Confusion matrix

The R function table() can be used to produce a confusion matrix in order to determin how many observations were correctly or incorrectly classified. It compares the observed and the predicted outcome values and shows the number of correct and incorrect predictions categorized by type of outcome.

# Confusion matrix, number of cases
table(observed.classes2, predicted.classess2)
##                  predicted.classess2
## observed.classes2 neg pos
##               neg  48   4
##               pos  11  15
# Confusion matrix, proportion of cases
table(observed.classes2, predicted.classess2) %>% prop.table() %>% round(digits = 3)
##                  predicted.classess2
## observed.classes2   neg   pos
##               neg 0.615 0.051
##               pos 0.141 0.192

The diagonal elements of the confusion matrix indicate correct predictions, while the off diagonals represent incorrect prediction. So, the correct classification rate is the sum of the number on the diagonal divided by the sample size in the test data. In our example, that is (48+15)/78 = 81%

  • True positives (d): these are cases in which we predicted the individuals would be diabetes-positive and they were .
  • True negatives (a): We predicted diabetes-negative, and the individuals were diabetes-negative
  • False positives (b): We predicted diabetes-positive, but the individuals didn’t actually hae diabetes. (Alsow known as a Type I error.)
  • False negatives (c): We predicted diabetes-negative, but they did have diabetes. (Also known as a Type II error.)

Technically the raw prediction accuracy of the model is defined as (TruePositives + TrueNegatives)/SampleSize.

Precision, Recall and Specificity

In addition to the raw classification accuracy, there are many other metrics that are widely used to examine the performance of a classification model, including:

Precision, which is the proportion of true positives among all the individuals that have been predicted to be diabetes-positive by the model. This represents the accuracy of a predicted positive outcome. Precision = TruePositives/(TruePositives + FalsePOsitives)

Sensitivity (or Recall), which is the True Positive Rate (TPR) or the proportion of identified positives among the diabetes-positive population (class = 1). Sensitivity = TruePOsitives/(TruePositives + FalseNegatives).

Specificity, which measures the True Negative Rate (TNR), that is the proportion of identified negatives among the diabetes-negative population (class = 0). Specificity = TrueNegatives/(TrueNegatives + FalseNegatives).

False POsitive Rate (FPR), which represents the proportion of identified positives among the healthy individuals (i.e. diabetes-negative). This can be seen as a false alarm. The FPR can be also calculated as 1-specificity. When positives are rare, the FPR can be high, leading to the situation where a predicted positive is most likely a negative.

confusionMatrix(observed.classes2, predicted.classess2, positive = "pos")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction neg pos
##        neg  48   4
##        pos  11  15
##                                           
##                Accuracy : 0.8077          
##                  95% CI : (0.7027, 0.8882)
##     No Information Rate : 0.7564          
##     P-Value [Acc > NIR] : 0.1787          
##                                           
##                   Kappa : 0.5361          
##                                           
##  Mcnemar's Test P-Value : 0.1213          
##                                           
##             Sensitivity : 0.7895          
##             Specificity : 0.8136          
##          Pos Pred Value : 0.5769          
##          Neg Pred Value : 0.9231          
##              Prevalence : 0.2436          
##          Detection Rate : 0.1923          
##    Detection Prevalence : 0.3333          
##       Balanced Accuracy : 0.8015          
##                                           
##        'Positive' Class : pos             
## 

In our example, the sensitivity is ~58%, that is the proportion of diabetes-positive individuals that were correctly identified by the model as diabetes-positive. The specificity of the model is ~92%, that is the proportion of diabetes-negative individuals that were correctly identified by the model as diabetes-negative. The model precision or the proportion of positive predicted value is 79%.

ROC curve

The ROC curve (or receiver operating characteristics curve) is apopular graphical measure for assessing the performance or the accuracy of a classifier, which corresponds to the total

Computing and plotting ROC curve

The ROC analysis can be easily performed using the R package pROC

library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following object is masked from 'package:glmnet':
## 
##     auc
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
res.roc <- roc(observed.classes2,preictions.probabilities2)
## Setting levels: control = neg, case = pos
## Setting direction: controls < cases
plot.roc(res.roc, print.auc = TRUE)

# Extract some interesting results
roc.data <- data_frame(
  thresholds = res.roc$thresholds,
  sensitivity = res.roc$sensitivities,
  specificity = res.roc$specificities
)
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
# Get the probability threshold for specificity = 0.6

roc.data %>% filter(specificity >= 0.6)
plot.roc(res.roc, print.auc = TRUE, print.thres = "best")

plot.roc(res.roc, print.thres = c(0.3,0.5,0.7))